[llvm] r222039 - Reapply r221924: "[GVN] Perform Scalar PRE on gep indices that feed loads before
Manman Ren
mren at apple.com
Tue Nov 18 14:39:21 PST 2014
Hi Chad,
About the buildbot phase 2 failure, I now think this commit is the root cause.
What I did was:
1> update the repo to r222239, then "svn merge -c -r222039 .”, made a release+assert build (installed to clang-install).
2> use clang-install binary and library to build the same repo with lto.
make -j 4 VERBOSE=1 CLANG_REPOSITORY_STRING=clang-Rlto_master_build SVN_REVISION=222059 DYLD_LIBRARY_PATH=/Users/manmanren/llvm_gmail/clang-install/lib/
3> the build completed
make[1]: Nothing to be done for `all'.
make[1]: Nothing to be done for `all'.
llvm[0]: ***** Completed Release Build
Is it okay for us to revert this commit and watch the bot? If the bot is still red, we can re-submit it.
If you want me to do more testing (maybe a lto self host with r22239), let me know,
Thanks,
Manman (The build czar)
> On Nov 14, 2014, at 1:09 PM, Chad Rosier <mcrosier at codeaurora.org> wrote:
>
> Author: mcrosier
> Date: Fri Nov 14 15:09:13 2014
> New Revision: 222039
>
> URL: http://llvm.org/viewvc/llvm-project?rev=222039&view=rev
> Log:
> Reapply r221924: "[GVN] Perform Scalar PRE on gep indices that feed loads before
> doing Load PRE"
>
> This commit updates the failing test in
> Analysis/TypeBasedAliasAnalysis/gvn-nonlocal-type-mismatch.ll
>
> The failing test is sensitive to the order in which we process loads. This
> version turns on the RPO traversal instead of the while DT traversal in GVN.
> The new test code is functionally same just the order of loads that are
> eliminated is swapped.
>
> This new version also fixes an issue where GVN splits a critical edge and
> potentially invalidate the RPO/DT iterator.
>
> Added:
> llvm/trunk/test/Transforms/GVN/pre-gep-load.ll
> Modified:
> llvm/trunk/lib/Transforms/Scalar/GVN.cpp
> llvm/trunk/test/Analysis/TypeBasedAliasAnalysis/gvn-nonlocal-type-mismatch.ll
>
> Modified: llvm/trunk/lib/Transforms/Scalar/GVN.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/GVN.cpp?rev=222039&r1=222038&r2=222039&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Transforms/Scalar/GVN.cpp (original)
> +++ llvm/trunk/lib/Transforms/Scalar/GVN.cpp Fri Nov 14 15:09:13 2014
> @@ -20,6 +20,7 @@
> #include "llvm/ADT/DepthFirstIterator.h"
> #include "llvm/ADT/Hashing.h"
> #include "llvm/ADT/MapVector.h"
> +#include "llvm/ADT/PostOrderIterator.h"
> #include "llvm/ADT/SetVector.h"
> #include "llvm/ADT/SmallPtrSet.h"
> #include "llvm/ADT/Statistic.h"
> @@ -709,6 +710,7 @@ namespace {
> void dump(DenseMap<uint32_t, Value*> &d);
> bool iterateOnFunction(Function &F);
> bool performPRE(Function &F);
> + bool performScalarPRE(Instruction *I);
> Value *findLeader(const BasicBlock *BB, uint32_t num);
> void cleanupGlobalSets();
> void verifyRemoved(const Instruction *I) const;
> @@ -1729,6 +1731,15 @@ bool GVN::processNonLocalLoad(LoadInst *
> return false;
> }
>
> + // If this load follows a GEP, see if we can PRE the indices before analyzing.
> + if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(LI->getOperand(0))) {
> + for (GetElementPtrInst::op_iterator OI = GEP->idx_begin(),
> + OE = GEP->idx_end();
> + OI != OE; ++OI)
> + if (Instruction *I = dyn_cast<Instruction>(OI->get()))
> + performScalarPRE(I);
> + }
> +
> // Step 2: Analyze the availability of the load
> AvailValInBlkVect ValuesPerBlock;
> UnavailBlkVect UnavailableBlocks;
> @@ -2431,175 +2442,182 @@ bool GVN::processBlock(BasicBlock *BB) {
> return ChangedFunction;
> }
>
> -/// performPRE - Perform a purely local form of PRE that looks for diamond
> -/// control flow patterns and attempts to perform simple PRE at the join point.
> -bool GVN::performPRE(Function &F) {
> - bool Changed = false;
> +bool GVN::performScalarPRE(Instruction *CurInst) {
> SmallVector<std::pair<Value*, BasicBlock*>, 8> predMap;
> - for (BasicBlock *CurrentBlock : depth_first(&F.getEntryBlock())) {
> - // Nothing to PRE in the entry block.
> - if (CurrentBlock == &F.getEntryBlock()) continue;
>
> - // Don't perform PRE on a landing pad.
> - if (CurrentBlock->isLandingPad()) continue;
> + if (isa<AllocaInst>(CurInst) || isa<TerminatorInst>(CurInst) ||
> + isa<PHINode>(CurInst) || CurInst->getType()->isVoidTy() ||
> + CurInst->mayReadFromMemory() || CurInst->mayHaveSideEffects() ||
> + isa<DbgInfoIntrinsic>(CurInst))
> + return false;
>
> - for (BasicBlock::iterator BI = CurrentBlock->begin(),
> - BE = CurrentBlock->end(); BI != BE; ) {
> - Instruction *CurInst = BI++;
> + // Don't do PRE on compares. The PHI would prevent CodeGenPrepare from
> + // sinking the compare again, and it would force the code generator to
> + // move the i1 from processor flags or predicate registers into a general
> + // purpose register.
> + if (isa<CmpInst>(CurInst))
> + return false;
>
> - if (isa<AllocaInst>(CurInst) ||
> - isa<TerminatorInst>(CurInst) || isa<PHINode>(CurInst) ||
> - CurInst->getType()->isVoidTy() ||
> - CurInst->mayReadFromMemory() || CurInst->mayHaveSideEffects() ||
> - isa<DbgInfoIntrinsic>(CurInst))
> - continue;
> + // We don't currently value number ANY inline asm calls.
> + if (CallInst *CallI = dyn_cast<CallInst>(CurInst))
> + if (CallI->isInlineAsm())
> + return false;
>
> - // Don't do PRE on compares. The PHI would prevent CodeGenPrepare from
> - // sinking the compare again, and it would force the code generator to
> - // move the i1 from processor flags or predicate registers into a general
> - // purpose register.
> - if (isa<CmpInst>(CurInst))
> - continue;
> + uint32_t ValNo = VN.lookup(CurInst);
>
> - // We don't currently value number ANY inline asm calls.
> - if (CallInst *CallI = dyn_cast<CallInst>(CurInst))
> - if (CallI->isInlineAsm())
> - continue;
> + // Look for the predecessors for PRE opportunities. We're
> + // only trying to solve the basic diamond case, where
> + // a value is computed in the successor and one predecessor,
> + // but not the other. We also explicitly disallow cases
> + // where the successor is its own predecessor, because they're
> + // more complicated to get right.
> + unsigned NumWith = 0;
> + unsigned NumWithout = 0;
> + BasicBlock *PREPred = nullptr;
> + BasicBlock *CurrentBlock = CurInst->getParent();
> + predMap.clear();
> +
> + for (pred_iterator PI = pred_begin(CurrentBlock), PE = pred_end(CurrentBlock);
> + PI != PE; ++PI) {
> + BasicBlock *P = *PI;
> + // We're not interested in PRE where the block is its
> + // own predecessor, or in blocks with predecessors
> + // that are not reachable.
> + if (P == CurrentBlock) {
> + NumWithout = 2;
> + break;
> + } else if (!DT->isReachableFromEntry(P)) {
> + NumWithout = 2;
> + break;
> + }
>
> - uint32_t ValNo = VN.lookup(CurInst);
> + Value *predV = findLeader(P, ValNo);
> + if (!predV) {
> + predMap.push_back(std::make_pair(static_cast<Value *>(nullptr), P));
> + PREPred = P;
> + ++NumWithout;
> + } else if (predV == CurInst) {
> + /* CurInst dominates this predecessor. */
> + NumWithout = 2;
> + break;
> + } else {
> + predMap.push_back(std::make_pair(predV, P));
> + ++NumWith;
> + }
> + }
>
> - // Look for the predecessors for PRE opportunities. We're
> - // only trying to solve the basic diamond case, where
> - // a value is computed in the successor and one predecessor,
> - // but not the other. We also explicitly disallow cases
> - // where the successor is its own predecessor, because they're
> - // more complicated to get right.
> - unsigned NumWith = 0;
> - unsigned NumWithout = 0;
> - BasicBlock *PREPred = nullptr;
> - predMap.clear();
> -
> - for (pred_iterator PI = pred_begin(CurrentBlock),
> - PE = pred_end(CurrentBlock); PI != PE; ++PI) {
> - BasicBlock *P = *PI;
> - // We're not interested in PRE where the block is its
> - // own predecessor, or in blocks with predecessors
> - // that are not reachable.
> - if (P == CurrentBlock) {
> - NumWithout = 2;
> - break;
> - } else if (!DT->isReachableFromEntry(P)) {
> - NumWithout = 2;
> - break;
> - }
> + // Don't do PRE when it might increase code size, i.e. when
> + // we would need to insert instructions in more than one pred.
> + if (NumWithout != 1 || NumWith == 0)
> + return false;
>
> - Value* predV = findLeader(P, ValNo);
> - if (!predV) {
> - predMap.push_back(std::make_pair(static_cast<Value *>(nullptr), P));
> - PREPred = P;
> - ++NumWithout;
> - } else if (predV == CurInst) {
> - /* CurInst dominates this predecessor. */
> - NumWithout = 2;
> - break;
> - } else {
> - predMap.push_back(std::make_pair(predV, P));
> - ++NumWith;
> - }
> - }
> + // Don't do PRE across indirect branch.
> + if (isa<IndirectBrInst>(PREPred->getTerminator()))
> + return false;
>
> - // Don't do PRE when it might increase code size, i.e. when
> - // we would need to insert instructions in more than one pred.
> - if (NumWithout != 1 || NumWith == 0)
> - continue;
> + // We can't do PRE safely on a critical edge, so instead we schedule
> + // the edge to be split and perform the PRE the next time we iterate
> + // on the function.
> + unsigned SuccNum = GetSuccessorNumber(PREPred, CurrentBlock);
> + if (isCriticalEdge(PREPred->getTerminator(), SuccNum)) {
> + toSplit.push_back(std::make_pair(PREPred->getTerminator(), SuccNum));
> + return false;
> + }
>
> - // Don't do PRE across indirect branch.
> - if (isa<IndirectBrInst>(PREPred->getTerminator()))
> - continue;
> + // Instantiate the expression in the predecessor that lacked it.
> + // Because we are going top-down through the block, all value numbers
> + // will be available in the predecessor by the time we need them. Any
> + // that weren't originally present will have been instantiated earlier
> + // in this loop.
> + Instruction *PREInstr = CurInst->clone();
> + bool success = true;
> + for (unsigned i = 0, e = CurInst->getNumOperands(); i != e; ++i) {
> + Value *Op = PREInstr->getOperand(i);
> + if (isa<Argument>(Op) || isa<Constant>(Op) || isa<GlobalValue>(Op))
> + continue;
>
> - // We can't do PRE safely on a critical edge, so instead we schedule
> - // the edge to be split and perform the PRE the next time we iterate
> - // on the function.
> - unsigned SuccNum = GetSuccessorNumber(PREPred, CurrentBlock);
> - if (isCriticalEdge(PREPred->getTerminator(), SuccNum)) {
> - toSplit.push_back(std::make_pair(PREPred->getTerminator(), SuccNum));
> - continue;
> - }
> + if (Value *V = findLeader(PREPred, VN.lookup(Op))) {
> + PREInstr->setOperand(i, V);
> + } else {
> + success = false;
> + break;
> + }
> + }
>
> - // Instantiate the expression in the predecessor that lacked it.
> - // Because we are going top-down through the block, all value numbers
> - // will be available in the predecessor by the time we need them. Any
> - // that weren't originally present will have been instantiated earlier
> - // in this loop.
> - Instruction *PREInstr = CurInst->clone();
> - bool success = true;
> - for (unsigned i = 0, e = CurInst->getNumOperands(); i != e; ++i) {
> - Value *Op = PREInstr->getOperand(i);
> - if (isa<Argument>(Op) || isa<Constant>(Op) || isa<GlobalValue>(Op))
> - continue;
> + // Fail out if we encounter an operand that is not available in
> + // the PRE predecessor. This is typically because of loads which
> + // are not value numbered precisely.
> + if (!success) {
> + DEBUG(verifyRemoved(PREInstr));
> + delete PREInstr;
> + return false;
> + }
>
> - if (Value *V = findLeader(PREPred, VN.lookup(Op))) {
> - PREInstr->setOperand(i, V);
> - } else {
> - success = false;
> - break;
> - }
> - }
> + PREInstr->insertBefore(PREPred->getTerminator());
> + PREInstr->setName(CurInst->getName() + ".pre");
> + PREInstr->setDebugLoc(CurInst->getDebugLoc());
> + VN.add(PREInstr, ValNo);
> + ++NumGVNPRE;
> +
> + // Update the availability map to include the new instruction.
> + addToLeaderTable(ValNo, PREInstr, PREPred);
> +
> + // Create a PHI to make the value available in this block.
> + PHINode *Phi =
> + PHINode::Create(CurInst->getType(), predMap.size(),
> + CurInst->getName() + ".pre-phi", CurrentBlock->begin());
> + for (unsigned i = 0, e = predMap.size(); i != e; ++i) {
> + if (Value *V = predMap[i].first)
> + Phi->addIncoming(V, predMap[i].second);
> + else
> + Phi->addIncoming(PREInstr, PREPred);
> + }
>
> - // Fail out if we encounter an operand that is not available in
> - // the PRE predecessor. This is typically because of loads which
> - // are not value numbered precisely.
> - if (!success) {
> - DEBUG(verifyRemoved(PREInstr));
> - delete PREInstr;
> - continue;
> - }
> + VN.add(Phi, ValNo);
> + addToLeaderTable(ValNo, Phi, CurrentBlock);
> + Phi->setDebugLoc(CurInst->getDebugLoc());
> + CurInst->replaceAllUsesWith(Phi);
> + if (Phi->getType()->getScalarType()->isPointerTy()) {
> + // Because we have added a PHI-use of the pointer value, it has now
> + // "escaped" from alias analysis' perspective. We need to inform
> + // AA of this.
> + for (unsigned ii = 0, ee = Phi->getNumIncomingValues(); ii != ee; ++ii) {
> + unsigned jj = PHINode::getOperandNumForIncomingValue(ii);
> + VN.getAliasAnalysis()->addEscapingUse(Phi->getOperandUse(jj));
> + }
>
> - PREInstr->insertBefore(PREPred->getTerminator());
> - PREInstr->setName(CurInst->getName() + ".pre");
> - PREInstr->setDebugLoc(CurInst->getDebugLoc());
> - VN.add(PREInstr, ValNo);
> - ++NumGVNPRE;
> -
> - // Update the availability map to include the new instruction.
> - addToLeaderTable(ValNo, PREInstr, PREPred);
> -
> - // Create a PHI to make the value available in this block.
> - PHINode* Phi = PHINode::Create(CurInst->getType(), predMap.size(),
> - CurInst->getName() + ".pre-phi",
> - CurrentBlock->begin());
> - for (unsigned i = 0, e = predMap.size(); i != e; ++i) {
> - if (Value *V = predMap[i].first)
> - Phi->addIncoming(V, predMap[i].second);
> - else
> - Phi->addIncoming(PREInstr, PREPred);
> - }
> -
> - VN.add(Phi, ValNo);
> - addToLeaderTable(ValNo, Phi, CurrentBlock);
> - Phi->setDebugLoc(CurInst->getDebugLoc());
> - CurInst->replaceAllUsesWith(Phi);
> - if (Phi->getType()->getScalarType()->isPointerTy()) {
> - // Because we have added a PHI-use of the pointer value, it has now
> - // "escaped" from alias analysis' perspective. We need to inform
> - // AA of this.
> - for (unsigned ii = 0, ee = Phi->getNumIncomingValues(); ii != ee;
> - ++ii) {
> - unsigned jj = PHINode::getOperandNumForIncomingValue(ii);
> - VN.getAliasAnalysis()->addEscapingUse(Phi->getOperandUse(jj));
> - }
> + if (MD)
> + MD->invalidateCachedPointerInfo(Phi);
> + }
> + VN.erase(CurInst);
> + removeFromLeaderTable(ValNo, CurInst, CurrentBlock);
>
> - if (MD)
> - MD->invalidateCachedPointerInfo(Phi);
> - }
> - VN.erase(CurInst);
> - removeFromLeaderTable(ValNo, CurInst, CurrentBlock);
> + DEBUG(dbgs() << "GVN PRE removed: " << *CurInst << '\n');
> + if (MD)
> + MD->removeInstruction(CurInst);
> + DEBUG(verifyRemoved(CurInst));
> + CurInst->eraseFromParent();
> + return true;
> +}
> +
> +/// performPRE - Perform a purely local form of PRE that looks for diamond
> +/// control flow patterns and attempts to perform simple PRE at the join point.
> +bool GVN::performPRE(Function &F) {
> + bool Changed = false;
> + for (BasicBlock *CurrentBlock : depth_first(&F.getEntryBlock())) {
> + // Nothing to PRE in the entry block.
> + if (CurrentBlock == &F.getEntryBlock())
> + continue;
>
> - DEBUG(dbgs() << "GVN PRE removed: " << *CurInst << '\n');
> - if (MD) MD->removeInstruction(CurInst);
> - DEBUG(verifyRemoved(CurInst));
> - CurInst->eraseFromParent();
> - Changed = true;
> + // Don't perform PRE on a landing pad.
> + if (CurrentBlock->isLandingPad())
> + continue;
> +
> + for (BasicBlock::iterator BI = CurrentBlock->begin(),
> + BE = CurrentBlock->end();
> + BI != BE;) {
> + Instruction *CurInst = BI++;
> + Changed = performScalarPRE(CurInst);
> }
> }
>
> @@ -2637,25 +2655,21 @@ bool GVN::iterateOnFunction(Function &F)
>
> // Top-down walk of the dominator tree
> bool Changed = false;
> -#if 0
> - // Needed for value numbering with phi construction to work.
> - ReversePostOrderTraversal<Function*> RPOT(&F);
> - for (ReversePostOrderTraversal<Function*>::rpo_iterator RI = RPOT.begin(),
> - RE = RPOT.end(); RI != RE; ++RI)
> - Changed |= processBlock(*RI);
> -#else
> // Save the blocks this function have before transformation begins. GVN may
> // split critical edge, and hence may invalidate the RPO/DT iterator.
> //
> std::vector<BasicBlock *> BBVect;
> BBVect.reserve(256);
> - for (DomTreeNode *X : depth_first(DT->getRootNode()))
> - BBVect.push_back(X->getBlock());
> + // Needed for value numbering with phi construction to work.
> + ReversePostOrderTraversal<Function *> RPOT(&F);
> + for (ReversePostOrderTraversal<Function *>::rpo_iterator RI = RPOT.begin(),
> + RE = RPOT.end();
> + RI != RE; ++RI)
> + BBVect.push_back(*RI);
>
> for (std::vector<BasicBlock *>::iterator I = BBVect.begin(), E = BBVect.end();
> I != E; I++)
> Changed |= processBlock(*I);
> -#endif
>
> return Changed;
> }
>
> Modified: llvm/trunk/test/Analysis/TypeBasedAliasAnalysis/gvn-nonlocal-type-mismatch.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/TypeBasedAliasAnalysis/gvn-nonlocal-type-mismatch.ll?rev=222039&r1=222038&r2=222039&view=diff
> ==============================================================================
> --- llvm/trunk/test/Analysis/TypeBasedAliasAnalysis/gvn-nonlocal-type-mismatch.ll (original)
> +++ llvm/trunk/test/Analysis/TypeBasedAliasAnalysis/gvn-nonlocal-type-mismatch.ll Fri Nov 14 15:09:13 2014
> @@ -46,12 +46,12 @@ entry:
> br i1 %c, label %if.else, label %if.then
>
> if.then:
> - %t = load i32* %p, !tbaa !4
> + %t = load i32* %p, !tbaa !3
> store i32 %t, i32* %q
> ret void
>
> if.else:
> - %u = load i32* %p, !tbaa !3
> + %u = load i32* %p, !tbaa !4
> store i32 %u, i32* %q
> ret void
> }
> @@ -61,11 +61,11 @@ if.else:
>
> ; CHECK: @watch_out_for_another_type_change
> ; CHECK: if.then:
> -; CHECK: %t = load i32* %p
> -; CHECK: store i32 %t, i32* %q
> +; CHECK: store i32 0, i32* %q
> ; CHECK: ret void
> ; CHECK: if.else:
> -; CHECK: store i32 0, i32* %q
> +; CHECK: %u = load i32* %p
> +; CHECK: store i32 %u, i32* %q
>
> define void @watch_out_for_another_type_change(i1 %c, i32* %p, i32* %p1, i32* %q) nounwind {
> entry:
> @@ -74,12 +74,12 @@ entry:
> br i1 %c, label %if.else, label %if.then
>
> if.then:
> - %t = load i32* %p, !tbaa !3
> + %t = load i32* %p, !tbaa !4
> store i32 %t, i32* %q
> ret void
>
> if.else:
> - %u = load i32* %p, !tbaa !4
> + %u = load i32* %p, !tbaa !3
> store i32 %u, i32* %q
> ret void
> }
>
> Added: llvm/trunk/test/Transforms/GVN/pre-gep-load.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/GVN/pre-gep-load.ll?rev=222039&view=auto
> ==============================================================================
> --- llvm/trunk/test/Transforms/GVN/pre-gep-load.ll (added)
> +++ llvm/trunk/test/Transforms/GVN/pre-gep-load.ll Fri Nov 14 15:09:13 2014
> @@ -0,0 +1,49 @@
> +; RUN: opt < %s -basicaa -gvn -enable-load-pre -S | FileCheck %s
> +target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
> +target triple = "aarch64--linux-gnu"
> +
> +define double @foo(i32 %stat, i32 %i, double** %p) {
> +; CHECK-LABEL: @foo(
> +entry:
> + switch i32 %stat, label %sw.default [
> + i32 0, label %sw.bb
> + i32 1, label %sw.bb
> + i32 2, label %sw.bb2
> + ]
> +
> +sw.bb: ; preds = %entry, %entry
> + %idxprom = sext i32 %i to i64
> + %arrayidx = getelementptr inbounds double** %p, i64 0
> + %0 = load double** %arrayidx, align 8
> + %arrayidx1 = getelementptr inbounds double* %0, i64 %idxprom
> + %1 = load double* %arrayidx1, align 8
> + %sub = fsub double %1, 1.000000e+00
> + %cmp = fcmp olt double %sub, 0.000000e+00
> + br i1 %cmp, label %if.then, label %if.end
> +
> +if.then: ; preds = %sw.bb
> + br label %return
> +
> +if.end: ; preds = %sw.bb
> + br label %sw.bb2
> +
> +sw.bb2: ; preds = %if.end, %entry
> + %idxprom3 = sext i32 %i to i64
> + %arrayidx4 = getelementptr inbounds double** %p, i64 0
> + %2 = load double** %arrayidx4, align 8
> + %arrayidx5 = getelementptr inbounds double* %2, i64 %idxprom3
> + %3 = load double* %arrayidx5, align 8
> +; CHECK: sw.bb2:
> +; CHECK-NEXT-NOT: sext
> +; CHECK-NEXT: phi double [
> +; CHECK-NOT: load
> + %sub6 = fsub double 3.000000e+00, %3
> + br label %return
> +
> +sw.default: ; preds = %entry
> + br label %return
> +
> +return: ; preds = %sw.default, %sw.bb2, %if.then
> + %retval.0 = phi double [ 0.000000e+00, %sw.default ], [ %sub6, %sw.bb2 ], [ %sub, %if.then ]
> + ret double %retval.0
> +}
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
More information about the llvm-commits
mailing list