r198640 - CodeGen: Initial instrumentation based PGO implementation

NAKAMURA Takumi geek4civic at gmail.com
Mon Jan 6 17:08:01 PST 2014


Don't seek bb labels like "if.else". Labels may not be emitted with -Asserts.

I have suppressed a failure in r198651. Could you fix
CodeGenCXX/instr-profile.cpp?

2014/1/7 Justin Bogner <mail at justinbogner.com>:
> Author: bogner
> Date: Mon Jan  6 16:27:43 2014
> New Revision: 198640
>
> URL: http://llvm.org/viewvc/llvm-project?rev=198640&view=rev
> Log:
> CodeGen: Initial instrumentation based PGO implementation
>
> Added:
>     cfe/trunk/lib/CodeGen/CodeGenPGO.cpp
>     cfe/trunk/lib/CodeGen/CodeGenPGO.h
>     cfe/trunk/test/CodeGen/Inputs/instr-profile.pgodata
>     cfe/trunk/test/CodeGen/instr-profile.c
>     cfe/trunk/test/CodeGenCXX/Inputs/
>     cfe/trunk/test/CodeGenCXX/Inputs/instr-profile.pgodata
>     cfe/trunk/test/CodeGenCXX/instr-profile.cpp
> Modified:
>     cfe/trunk/lib/CodeGen/CGCUDARuntime.cpp
>     cfe/trunk/lib/CodeGen/CGCall.cpp
>     cfe/trunk/lib/CodeGen/CGException.cpp
>     cfe/trunk/lib/CodeGen/CGExpr.cpp
>     cfe/trunk/lib/CodeGen/CGExprAgg.cpp
>     cfe/trunk/lib/CodeGen/CGExprComplex.cpp
>     cfe/trunk/lib/CodeGen/CGExprScalar.cpp
>     cfe/trunk/lib/CodeGen/CGObjC.cpp
>     cfe/trunk/lib/CodeGen/CGStmt.cpp
>     cfe/trunk/lib/CodeGen/CMakeLists.txt
>     cfe/trunk/lib/CodeGen/CodeGenFunction.cpp
>     cfe/trunk/lib/CodeGen/CodeGenFunction.h
>     cfe/trunk/lib/CodeGen/CodeGenModule.cpp
>     cfe/trunk/lib/CodeGen/CodeGenModule.h
>
> Modified: cfe/trunk/lib/CodeGen/CGCUDARuntime.cpp
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGCUDARuntime.cpp?rev=198640&r1=198639&r2=198640&view=diff
> ==============================================================================
> --- cfe/trunk/lib/CodeGen/CGCUDARuntime.cpp (original)
> +++ cfe/trunk/lib/CodeGen/CGCUDARuntime.cpp Mon Jan  6 16:27:43 2014
> @@ -31,7 +31,8 @@ RValue CGCUDARuntime::EmitCUDAKernelCall
>    llvm::BasicBlock *ContBlock = CGF.createBasicBlock("kcall.end");
>
>    CodeGenFunction::ConditionalEvaluation eval(CGF);
> -  CGF.EmitBranchOnBoolExpr(E->getConfig(), ContBlock, ConfigOKBlock);
> +  CGF.EmitBranchOnBoolExpr(E->getConfig(), ContBlock, ConfigOKBlock,
> +                           /*TrueCount=*/0);
>
>    eval.begin(CGF);
>    CGF.EmitBlock(ConfigOKBlock);
>
> Modified: cfe/trunk/lib/CodeGen/CGCall.cpp
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGCall.cpp?rev=198640&r1=198639&r2=198640&view=diff
> ==============================================================================
> --- cfe/trunk/lib/CodeGen/CGCall.cpp (original)
> +++ cfe/trunk/lib/CodeGen/CGCall.cpp Mon Jan  6 16:27:43 2014
> @@ -2184,6 +2184,7 @@ void CodeGenFunction::EmitNoreturnRuntim
>      call->setCallingConv(getRuntimeCC());
>      Builder.CreateUnreachable();
>    }
> +  PGO.setCurrentRegionCount(0);
>  }
>
>  /// Emits a call or invoke instruction to the given nullary runtime
>
> Modified: cfe/trunk/lib/CodeGen/CGException.cpp
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGException.cpp?rev=198640&r1=198639&r2=198640&view=diff
> ==============================================================================
> --- cfe/trunk/lib/CodeGen/CGException.cpp (original)
> +++ cfe/trunk/lib/CodeGen/CGException.cpp Mon Jan  6 16:27:43 2014
> @@ -1294,6 +1294,10 @@ void CodeGenFunction::ExitCXXTryStmt(con
>      // Initialize the catch variable and set up the cleanups.
>      BeginCatch(*this, C);
>
> +    // Emit the PGO counter increment
> +    RegionCounter CatchCnt = getPGORegionCounter(C);
> +    CatchCnt.beginRegion(Builder);
> +
>      // Perform the body of the catch.
>      EmitStmt(C->getHandlerBlock());
>
> @@ -1320,7 +1324,9 @@ void CodeGenFunction::ExitCXXTryStmt(con
>        Builder.CreateBr(ContBB);
>    }
>
> +  RegionCounter ContCnt = getPGORegionCounter(&S);
>    EmitBlock(ContBB);
> +  ContCnt.beginRegion(Builder);
>  }
>
>  namespace {
>
> Modified: cfe/trunk/lib/CodeGen/CGExpr.cpp
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGExpr.cpp?rev=198640&r1=198639&r2=198640&view=diff
> ==============================================================================
> --- cfe/trunk/lib/CodeGen/CGExpr.cpp (original)
> +++ cfe/trunk/lib/CodeGen/CGExpr.cpp Mon Jan  6 16:27:43 2014
> @@ -2651,6 +2651,7 @@ EmitConditionalOperatorLValue(const Abst
>    }
>
>    OpaqueValueMapping binding(*this, expr);
> +  RegionCounter Cnt = getPGORegionCounter(expr);
>
>    const Expr *condExpr = expr->getCond();
>    bool CondExprBool;
> @@ -2658,8 +2659,12 @@ EmitConditionalOperatorLValue(const Abst
>      const Expr *live = expr->getTrueExpr(), *dead = expr->getFalseExpr();
>      if (!CondExprBool) std::swap(live, dead);
>
> -    if (!ContainsLabel(dead))
> +    if (!ContainsLabel(dead)) {
> +      // If the true case is live, we need to track its region
> +      if (CondExprBool)
> +        Cnt.beginRegion(Builder);
>        return EmitLValue(live);
> +    }
>    }
>
>    llvm::BasicBlock *lhsBlock = createBasicBlock("cond.true");
> @@ -2667,13 +2672,15 @@ EmitConditionalOperatorLValue(const Abst
>    llvm::BasicBlock *contBlock = createBasicBlock("cond.end");
>
>    ConditionalEvaluation eval(*this);
> -  EmitBranchOnBoolExpr(condExpr, lhsBlock, rhsBlock);
> +  EmitBranchOnBoolExpr(condExpr, lhsBlock, rhsBlock, Cnt.getCount());
>
>    // Any temporaries created here are conditional.
>    EmitBlock(lhsBlock);
> +  Cnt.beginRegion(Builder);
>    eval.begin(*this);
>    LValue lhs = EmitLValue(expr->getTrueExpr());
>    eval.end(*this);
> +  Cnt.adjustFallThroughCount();
>
>    if (!lhs.isSimple())
>      return EmitUnsupportedLValue(expr, "conditional operator");
> @@ -2683,14 +2690,17 @@ EmitConditionalOperatorLValue(const Abst
>
>    // Any temporaries created here are conditional.
>    EmitBlock(rhsBlock);
> +  Cnt.beginElseRegion();
>    eval.begin(*this);
>    LValue rhs = EmitLValue(expr->getFalseExpr());
>    eval.end(*this);
> +  Cnt.adjustFallThroughCount();
>    if (!rhs.isSimple())
>      return EmitUnsupportedLValue(expr, "conditional operator");
>    rhsBlock = Builder.GetInsertBlock();
>
>    EmitBlock(contBlock);
> +  Cnt.applyAdjustmentsToRegion();
>
>    llvm::PHINode *phi = Builder.CreatePHI(lhs.getAddress()->getType(), 2,
>                                           "cond-lvalue");
>
> Modified: cfe/trunk/lib/CodeGen/CGExprAgg.cpp
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGExprAgg.cpp?rev=198640&r1=198639&r2=198640&view=diff
> ==============================================================================
> --- cfe/trunk/lib/CodeGen/CGExprAgg.cpp (original)
> +++ cfe/trunk/lib/CodeGen/CGExprAgg.cpp Mon Jan  6 16:27:43 2014
> @@ -892,15 +892,18 @@ VisitAbstractConditionalOperator(const A
>    // Bind the common expression if necessary.
>    CodeGenFunction::OpaqueValueMapping binding(CGF, E);
>
> +  RegionCounter Cnt = CGF.getPGORegionCounter(E);
>    CodeGenFunction::ConditionalEvaluation eval(CGF);
> -  CGF.EmitBranchOnBoolExpr(E->getCond(), LHSBlock, RHSBlock);
> +  CGF.EmitBranchOnBoolExpr(E->getCond(), LHSBlock, RHSBlock, Cnt.getCount());
>
>    // Save whether the destination's lifetime is externally managed.
>    bool isExternallyDestructed = Dest.isExternallyDestructed();
>
>    eval.begin(CGF);
>    CGF.EmitBlock(LHSBlock);
> +  Cnt.beginRegion(Builder);
>    Visit(E->getTrueExpr());
> +  Cnt.adjustFallThroughCount();
>    eval.end(CGF);
>
>    assert(CGF.HaveInsertPoint() && "expression evaluation ended with no IP!");
> @@ -914,10 +917,13 @@ VisitAbstractConditionalOperator(const A
>
>    eval.begin(CGF);
>    CGF.EmitBlock(RHSBlock);
> +  Cnt.beginElseRegion();
>    Visit(E->getFalseExpr());
> +  Cnt.adjustFallThroughCount();
>    eval.end(CGF);
>
>    CGF.EmitBlock(ContBlock);
> +  Cnt.applyAdjustmentsToRegion();
>  }
>
>  void AggExprEmitter::VisitChooseExpr(const ChooseExpr *CE) {
>
> Modified: cfe/trunk/lib/CodeGen/CGExprComplex.cpp
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGExprComplex.cpp?rev=198640&r1=198639&r2=198640&view=diff
> ==============================================================================
> --- cfe/trunk/lib/CodeGen/CGExprComplex.cpp (original)
> +++ cfe/trunk/lib/CodeGen/CGExprComplex.cpp Mon Jan  6 16:27:43 2014
> @@ -752,22 +752,28 @@ VisitAbstractConditionalOperator(const A
>    // Bind the common expression if necessary.
>    CodeGenFunction::OpaqueValueMapping binding(CGF, E);
>
> +  RegionCounter Cnt = CGF.getPGORegionCounter(E);
>    CodeGenFunction::ConditionalEvaluation eval(CGF);
> -  CGF.EmitBranchOnBoolExpr(E->getCond(), LHSBlock, RHSBlock);
> +  CGF.EmitBranchOnBoolExpr(E->getCond(), LHSBlock, RHSBlock, Cnt.getCount());
>
>    eval.begin(CGF);
>    CGF.EmitBlock(LHSBlock);
> +  Cnt.beginRegion(Builder);
>    ComplexPairTy LHS = Visit(E->getTrueExpr());
> +  Cnt.adjustFallThroughCount();
>    LHSBlock = Builder.GetInsertBlock();
>    CGF.EmitBranch(ContBlock);
>    eval.end(CGF);
>
>    eval.begin(CGF);
>    CGF.EmitBlock(RHSBlock);
> +  Cnt.beginElseRegion();
>    ComplexPairTy RHS = Visit(E->getFalseExpr());
> +  Cnt.adjustFallThroughCount();
>    RHSBlock = Builder.GetInsertBlock();
>    CGF.EmitBlock(ContBlock);
>    eval.end(CGF);
> +  Cnt.applyAdjustmentsToRegion();
>
>    // Create a PHI node for the real part.
>    llvm::PHINode *RealPN = Builder.CreatePHI(LHS.first->getType(), 2, "cond.r");
>
> Modified: cfe/trunk/lib/CodeGen/CGExprScalar.cpp
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGExprScalar.cpp?rev=198640&r1=198639&r2=198640&view=diff
> ==============================================================================
> --- cfe/trunk/lib/CodeGen/CGExprScalar.cpp (original)
> +++ cfe/trunk/lib/CodeGen/CGExprScalar.cpp Mon Jan  6 16:27:43 2014
> @@ -2874,8 +2874,12 @@ Value *ScalarExprEmitter::VisitBinAssign
>  }
>
>  Value *ScalarExprEmitter::VisitBinLAnd(const BinaryOperator *E) {
> +  RegionCounter Cnt = CGF.getPGORegionCounter(E);
> +
>    // Perform vector logical and on comparisons with zero vectors.
>    if (E->getType()->isVectorType()) {
> +    Cnt.beginRegion(Builder);
> +
>      Value *LHS = Visit(E->getLHS());
>      Value *RHS = Visit(E->getRHS());
>      Value *Zero = llvm::ConstantAggregateZero::get(LHS->getType());
> @@ -2897,6 +2901,8 @@ Value *ScalarExprEmitter::VisitBinLAnd(c
>    bool LHSCondVal;
>    if (CGF.ConstantFoldsToSimpleInteger(E->getLHS(), LHSCondVal)) {
>      if (LHSCondVal) { // If we have 1 && X, just emit X.
> +      Cnt.beginRegion(Builder);
> +
>        Value *RHSCond = CGF.EvaluateExprAsBool(E->getRHS());
>        // ZExt result to int or bool.
>        return Builder.CreateZExtOrBitCast(RHSCond, ResTy, "land.ext");
> @@ -2913,7 +2919,7 @@ Value *ScalarExprEmitter::VisitBinLAnd(c
>    CodeGenFunction::ConditionalEvaluation eval(CGF);
>
>    // Branch on the LHS first.  If it is false, go to the failure (cont) block.
> -  CGF.EmitBranchOnBoolExpr(E->getLHS(), RHSBlock, ContBlock);
> +  CGF.EmitBranchOnBoolExpr(E->getLHS(), RHSBlock, ContBlock, Cnt.getCount());
>
>    // Any edges into the ContBlock are now from an (indeterminate number of)
>    // edges from this first condition.  All of these values will be false.  Start
> @@ -2926,7 +2932,9 @@ Value *ScalarExprEmitter::VisitBinLAnd(c
>
>    eval.begin(CGF);
>    CGF.EmitBlock(RHSBlock);
> +  Cnt.beginRegion(Builder);
>    Value *RHSCond = CGF.EvaluateExprAsBool(E->getRHS());
> +  Cnt.adjustFallThroughCount();
>    eval.end(CGF);
>
>    // Reaquire the RHS block, as there may be subblocks inserted.
> @@ -2939,14 +2947,19 @@ Value *ScalarExprEmitter::VisitBinLAnd(c
>      Builder.SetCurrentDebugLocation(llvm::DebugLoc());
>    CGF.EmitBlock(ContBlock);
>    PN->addIncoming(RHSCond, RHSBlock);
> +  Cnt.applyAdjustmentsToRegion();
>
>    // ZExt result to int.
>    return Builder.CreateZExtOrBitCast(PN, ResTy, "land.ext");
>  }
>
>  Value *ScalarExprEmitter::VisitBinLOr(const BinaryOperator *E) {
> +  RegionCounter Cnt = CGF.getPGORegionCounter(E);
> +
>    // Perform vector logical or on comparisons with zero vectors.
>    if (E->getType()->isVectorType()) {
> +    Cnt.beginRegion(Builder);
> +
>      Value *LHS = Visit(E->getLHS());
>      Value *RHS = Visit(E->getRHS());
>      Value *Zero = llvm::ConstantAggregateZero::get(LHS->getType());
> @@ -2968,6 +2981,8 @@ Value *ScalarExprEmitter::VisitBinLOr(co
>    bool LHSCondVal;
>    if (CGF.ConstantFoldsToSimpleInteger(E->getLHS(), LHSCondVal)) {
>      if (!LHSCondVal) { // If we have 0 || X, just emit X.
> +      Cnt.beginRegion(Builder);
> +
>        Value *RHSCond = CGF.EvaluateExprAsBool(E->getRHS());
>        // ZExt result to int or bool.
>        return Builder.CreateZExtOrBitCast(RHSCond, ResTy, "lor.ext");
> @@ -2984,7 +2999,8 @@ Value *ScalarExprEmitter::VisitBinLOr(co
>    CodeGenFunction::ConditionalEvaluation eval(CGF);
>
>    // Branch on the LHS first.  If it is true, go to the success (cont) block.
> -  CGF.EmitBranchOnBoolExpr(E->getLHS(), ContBlock, RHSBlock);
> +  CGF.EmitBranchOnBoolExpr(E->getLHS(), ContBlock, RHSBlock,
> +                           Cnt.getParentCount() - Cnt.getCount());
>
>    // Any edges into the ContBlock are now from an (indeterminate number of)
>    // edges from this first condition.  All of these values will be true.  Start
> @@ -2999,7 +3015,9 @@ Value *ScalarExprEmitter::VisitBinLOr(co
>
>    // Emit the RHS condition as a bool value.
>    CGF.EmitBlock(RHSBlock);
> +  Cnt.beginRegion(Builder);
>    Value *RHSCond = CGF.EvaluateExprAsBool(E->getRHS());
> +  Cnt.adjustFallThroughCount();
>
>    eval.end(CGF);
>
> @@ -3010,6 +3028,7 @@ Value *ScalarExprEmitter::VisitBinLOr(co
>    // into the phi node for the edge with the value of RHSCond.
>    CGF.EmitBlock(ContBlock);
>    PN->addIncoming(RHSCond, RHSBlock);
> +  Cnt.applyAdjustmentsToRegion();
>
>    // ZExt result to int.
>    return Builder.CreateZExtOrBitCast(PN, ResTy, "lor.ext");
> @@ -3049,6 +3068,7 @@ VisitAbstractConditionalOperator(const A
>
>    // Bind the common expression if necessary.
>    CodeGenFunction::OpaqueValueMapping binding(CGF, E);
> +  RegionCounter Cnt = CGF.getPGORegionCounter(E);
>
>    Expr *condExpr = E->getCond();
>    Expr *lhsExpr = E->getTrueExpr();
> @@ -3063,6 +3083,8 @@ VisitAbstractConditionalOperator(const A
>
>      // If the dead side doesn't have labels we need, just emit the Live part.
>      if (!CGF.ContainsLabel(dead)) {
> +      if (CondExprBool)
> +        Cnt.beginRegion(Builder);
>        Value *Result = Visit(live);
>
>        // If the live part is a throw expression, it acts like it has a void
> @@ -3079,6 +3101,8 @@ VisitAbstractConditionalOperator(const A
>    // the select function.
>    if (CGF.getLangOpts().OpenCL
>        && condExpr->getType()->isVectorType()) {
> +    Cnt.beginRegion(Builder);
> +
>      llvm::Value *CondV = CGF.EmitScalarExpr(condExpr);
>      llvm::Value *LHS = Visit(lhsExpr);
>      llvm::Value *RHS = Visit(rhsExpr);
> @@ -3122,6 +3146,8 @@ VisitAbstractConditionalOperator(const A
>    // safe to evaluate the LHS and RHS unconditionally.
>    if (isCheapEnoughToEvaluateUnconditionally(lhsExpr, CGF) &&
>        isCheapEnoughToEvaluateUnconditionally(rhsExpr, CGF)) {
> +    Cnt.beginRegion(Builder);
> +
>      llvm::Value *CondV = CGF.EvaluateExprAsBool(condExpr);
>      llvm::Value *LHS = Visit(lhsExpr);
>      llvm::Value *RHS = Visit(rhsExpr);
> @@ -3138,23 +3164,28 @@ VisitAbstractConditionalOperator(const A
>    llvm::BasicBlock *ContBlock = CGF.createBasicBlock("cond.end");
>
>    CodeGenFunction::ConditionalEvaluation eval(CGF);
> -  CGF.EmitBranchOnBoolExpr(condExpr, LHSBlock, RHSBlock);
> +  CGF.EmitBranchOnBoolExpr(condExpr, LHSBlock, RHSBlock, Cnt.getCount());
>
>    CGF.EmitBlock(LHSBlock);
> +  Cnt.beginRegion(Builder);
>    eval.begin(CGF);
>    Value *LHS = Visit(lhsExpr);
>    eval.end(CGF);
> +  Cnt.adjustFallThroughCount();
>
>    LHSBlock = Builder.GetInsertBlock();
>    Builder.CreateBr(ContBlock);
>
>    CGF.EmitBlock(RHSBlock);
> +  Cnt.beginElseRegion();
>    eval.begin(CGF);
>    Value *RHS = Visit(rhsExpr);
>    eval.end(CGF);
> +  Cnt.adjustFallThroughCount();
>
>    RHSBlock = Builder.GetInsertBlock();
>    CGF.EmitBlock(ContBlock);
> +  Cnt.applyAdjustmentsToRegion();
>
>    // If the LHS or RHS is a throw expression, it will be legitimately null.
>    if (!LHS)
>
> Modified: cfe/trunk/lib/CodeGen/CGObjC.cpp
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGObjC.cpp?rev=198640&r1=198639&r2=198640&view=diff
> ==============================================================================
> --- cfe/trunk/lib/CodeGen/CGObjC.cpp (original)
> +++ cfe/trunk/lib/CodeGen/CGObjC.cpp Mon Jan  6 16:27:43 2014
> @@ -1523,10 +1523,13 @@ void CodeGenFunction::EmitObjCForCollect
>    llvm::Value *initialMutations =
>      Builder.CreateLoad(StateMutationsPtr, "forcoll.initial-mutations");
>
> +  RegionCounter Cnt = getPGORegionCounter(&S);
> +
>    // Start looping.  This is the point we return to whenever we have a
>    // fresh, non-empty batch of objects.
>    llvm::BasicBlock *LoopBodyBB = createBasicBlock("forcoll.loopbody");
>    EmitBlock(LoopBodyBB);
> +  Cnt.beginRegion(Builder);
>
>    // The current index into the buffer.
>    llvm::PHINode *index = Builder.CreatePHI(UnsignedLongLTy, 3, "forcoll.index");
> @@ -1623,7 +1626,7 @@ void CodeGenFunction::EmitObjCForCollect
>      EmitAutoVarCleanups(variable);
>
>    // Perform the loop body, setting up break and continue labels.
> -  BreakContinueStack.push_back(BreakContinue(LoopEnd, AfterBody));
> +  BreakContinueStack.push_back(BreakContinue(LoopEnd, AfterBody, &Cnt));
>    {
>      RunCleanupsScope Scope(*this);
>      EmitStmt(S.getBody());
> @@ -1642,6 +1645,7 @@ void CodeGenFunction::EmitObjCForCollect
>    llvm::Value *indexPlusOne
>      = Builder.CreateAdd(index, llvm::ConstantInt::get(UnsignedLongLTy, 1));
>
> +  // TODO: We should probably model this as a "continue" for PGO
>    // If we haven't overrun the buffer yet, we can continue.
>    Builder.CreateCondBr(Builder.CreateICmpULT(indexPlusOne, count),
>                         LoopBodyBB, FetchMoreBB);
> @@ -1665,6 +1669,8 @@ void CodeGenFunction::EmitObjCForCollect
>    index->addIncoming(zero, Builder.GetInsertBlock());
>    count->addIncoming(refetchCount, Builder.GetInsertBlock());
>
> +  // TODO: We should be applying PGO weights here, but this needs to handle the
> +  // branch before FetchMoreBB or we risk getting the numbers wrong.
>    Builder.CreateCondBr(Builder.CreateICmpEQ(refetchCount, zero),
>                         EmptyBB, LoopBodyBB);
>
> @@ -1687,6 +1693,7 @@ void CodeGenFunction::EmitObjCForCollect
>      PopCleanupBlock();
>
>    EmitBlock(LoopEnd.getBlock());
> +  // TODO: Once we calculate PGO weights above, set the region count here
>  }
>
>  void CodeGenFunction::EmitObjCAtTryStmt(const ObjCAtTryStmt &S) {
>
> Modified: cfe/trunk/lib/CodeGen/CGStmt.cpp
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGStmt.cpp?rev=198640&r1=198639&r2=198640&view=diff
> ==============================================================================
> --- cfe/trunk/lib/CodeGen/CGStmt.cpp (original)
> +++ cfe/trunk/lib/CodeGen/CGStmt.cpp Mon Jan  6 16:27:43 2014
> @@ -358,7 +358,9 @@ void CodeGenFunction::EmitLabel(const La
>      ResolveBranchFixups(Dest.getBlock());
>    }
>
> +  RegionCounter Cnt = getPGORegionCounter(D->getStmt());
>    EmitBlock(Dest.getBlock());
> +  Cnt.beginRegion(Builder);
>  }
>
>  /// Change the cleanup scope of the labels in this lexical scope to
> @@ -402,12 +404,14 @@ void CodeGenFunction::EmitGotoStmt(const
>      EmitStopPoint(&S);
>
>    EmitBranchThroughCleanup(getJumpDestForLabel(S.getLabel()));
> +  PGO.setCurrentRegionCount(0);
>  }
>
>
>  void CodeGenFunction::EmitIndirectGotoStmt(const IndirectGotoStmt &S) {
>    if (const LabelDecl *Target = S.getConstantTarget()) {
>      EmitBranchThroughCleanup(getJumpDestForLabel(Target));
> +    PGO.setCurrentRegionCount(0);
>      return;
>    }
>
> @@ -424,12 +428,14 @@ void CodeGenFunction::EmitIndirectGotoSt
>    cast<llvm::PHINode>(IndGotoBB->begin())->addIncoming(V, CurBB);
>
>    EmitBranch(IndGotoBB);
> +  PGO.setCurrentRegionCount(0);
>  }
>
>  void CodeGenFunction::EmitIfStmt(const IfStmt &S) {
>    // C99 6.8.4.1: The first substatement is executed if the expression compares
>    // unequal to 0.  The condition must be a scalar type.
>    LexicalScope ConditionScope(*this, S.getSourceRange());
> +  RegionCounter Cnt = getPGORegionCounter(&S);
>
>    if (S.getConditionVariable())
>      EmitAutoVarDecl(*S.getConditionVariable());
> @@ -447,6 +453,8 @@ void CodeGenFunction::EmitIfStmt(const I
>      // If the skipped block has no labels in it, just emit the executed block.
>      // This avoids emitting dead code and simplifies the CFG substantially.
>      if (!ContainsLabel(Skipped)) {
> +      if (CondConstant)
> +        Cnt.beginRegion(Builder);
>        if (Executed) {
>          RunCleanupsScope ExecutedScope(*this);
>          EmitStmt(Executed);
> @@ -462,14 +470,17 @@ void CodeGenFunction::EmitIfStmt(const I
>    llvm::BasicBlock *ElseBlock = ContBlock;
>    if (S.getElse())
>      ElseBlock = createBasicBlock("if.else");
> -  EmitBranchOnBoolExpr(S.getCond(), ThenBlock, ElseBlock);
> +
> +  EmitBranchOnBoolExpr(S.getCond(), ThenBlock, ElseBlock, Cnt.getCount());
>
>    // Emit the 'then' code.
> -  EmitBlock(ThenBlock);
> +  EmitBlock(ThenBlock);
> +  Cnt.beginRegion(Builder);
>    {
>      RunCleanupsScope ThenScope(*this);
>      EmitStmt(S.getThen());
>    }
> +  Cnt.adjustFallThroughCount();
>    EmitBranch(ContBlock);
>
>    // Emit the 'else' code if present.
> @@ -478,10 +489,12 @@ void CodeGenFunction::EmitIfStmt(const I
>      if (getDebugInfo())
>        Builder.SetCurrentDebugLocation(llvm::DebugLoc());
>      EmitBlock(ElseBlock);
> +    Cnt.beginElseRegion();
>      {
>        RunCleanupsScope ElseScope(*this);
>        EmitStmt(Else);
>      }
> +    Cnt.adjustFallThroughCount();
>      // There is no need to emit line number for unconditional branch.
>      if (getDebugInfo())
>        Builder.SetCurrentDebugLocation(llvm::DebugLoc());
> @@ -490,9 +503,12 @@ void CodeGenFunction::EmitIfStmt(const I
>
>    // Emit the continuation block for code after the if.
>    EmitBlock(ContBlock, true);
> +  Cnt.applyAdjustmentsToRegion();
>  }
>
>  void CodeGenFunction::EmitWhileStmt(const WhileStmt &S) {
> +  RegionCounter Cnt = getPGORegionCounter(&S);
> +
>    // Emit the header for the loop, which will also become
>    // the continue target.
>    JumpDest LoopHeader = getJumpDestInCurrentScope("while.cond");
> @@ -503,7 +519,7 @@ void CodeGenFunction::EmitWhileStmt(cons
>    JumpDest LoopExit = getJumpDestInCurrentScope("while.end");
>
>    // Store the blocks to use for break and continue.
> -  BreakContinueStack.push_back(BreakContinue(LoopExit, LoopHeader));
> +  BreakContinueStack.push_back(BreakContinue(LoopExit, LoopHeader, &Cnt));
>
>    // C++ [stmt.while]p2:
>    //   When the condition of a while statement is a declaration, the
> @@ -525,6 +541,7 @@ void CodeGenFunction::EmitWhileStmt(cons
>    // while(1) is common, avoid extra exit blocks.  Be sure
>    // to correctly handle break/continue though.
>    bool EmitBoolCondBranch = true;
> +  llvm::BranchInst *CondBr = NULL;
>    if (llvm::ConstantInt *C = dyn_cast<llvm::ConstantInt>(BoolCondVal))
>      if (C->isOne())
>        EmitBoolCondBranch = false;
> @@ -536,7 +553,7 @@ void CodeGenFunction::EmitWhileStmt(cons
>      if (ConditionScope.requiresCleanups())
>        ExitBlock = createBasicBlock("while.exit");
>
> -    Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock);
> +    CondBr = Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock);
>
>      if (ExitBlock != LoopExit.getBlock()) {
>        EmitBlock(ExitBlock);
> @@ -549,11 +566,19 @@ void CodeGenFunction::EmitWhileStmt(cons
>    {
>      RunCleanupsScope BodyScope(*this);
>      EmitBlock(LoopBody);
> +    Cnt.beginRegion(Builder);
>      EmitStmt(S.getBody());
>    }
> +  Cnt.adjustFallThroughCount();
>
>    BreakContinueStack.pop_back();
>
> +  uint64_t LoopCount = Cnt.getCount();
> +  uint64_t ExitCount = Cnt.getLoopExitCount();
> +  if (EmitBoolCondBranch)
> +    CondBr->setMetadata(llvm::LLVMContext::MD_prof,
> +                        PGO.createBranchWeights(LoopCount, ExitCount));
> +
>    // Immediately force cleanup.
>    ConditionScope.ForceCleanup();
>
> @@ -562,6 +587,7 @@ void CodeGenFunction::EmitWhileStmt(cons
>
>    // Emit the exit block.
>    EmitBlock(LoopExit.getBlock(), true);
> +  PGO.setCurrentRegionCount(ExitCount + Cnt.getBreakCounter().getCount());
>
>    // The LoopHeader typically is just a branch if we skipped emitting
>    // a branch, try to erase it.
> @@ -573,16 +599,20 @@ void CodeGenFunction::EmitDoStmt(const D
>    JumpDest LoopExit = getJumpDestInCurrentScope("do.end");
>    JumpDest LoopCond = getJumpDestInCurrentScope("do.cond");
>
> +  RegionCounter Cnt = getPGORegionCounter(&S);
> +
>    // Store the blocks to use for break and continue.
> -  BreakContinueStack.push_back(BreakContinue(LoopExit, LoopCond));
> +  BreakContinueStack.push_back(BreakContinue(LoopExit, LoopCond, &Cnt));
>
>    // Emit the body of the loop.
>    llvm::BasicBlock *LoopBody = createBasicBlock("do.body");
>    EmitBlock(LoopBody);
> +  Cnt.beginRegion(Builder);
>    {
>      RunCleanupsScope BodyScope(*this);
>      EmitStmt(S.getBody());
>    }
> +  Cnt.adjustFallThroughCount();
>
>    BreakContinueStack.pop_back();
>
> @@ -603,12 +633,18 @@ void CodeGenFunction::EmitDoStmt(const D
>      if (C->isZero())
>        EmitBoolCondBranch = false;
>
> +  uint64_t LoopCount = Cnt.getCount() - Cnt.getParentCount();
> +  uint64_t ExitCount = Cnt.getLoopExitCount();
> +
>    // As long as the condition is true, iterate the loop.
> -  if (EmitBoolCondBranch)
> -    Builder.CreateCondBr(BoolCondVal, LoopBody, LoopExit.getBlock());
> +  if (EmitBoolCondBranch) {
> +    Builder.CreateCondBr(BoolCondVal, LoopBody, LoopExit.getBlock(),
> +                         PGO.createBranchWeights(LoopCount, ExitCount));
> +  }
>
>    // Emit the exit block.
>    EmitBlock(LoopExit.getBlock());
> +  PGO.setCurrentRegionCount(ExitCount + Cnt.getBreakCounter().getCount());
>
>    // The DoCond block typically is just a branch if we skipped
>    // emitting a branch, try to erase it.
> @@ -617,6 +653,8 @@ void CodeGenFunction::EmitDoStmt(const D
>  }
>
>  void CodeGenFunction::EmitForStmt(const ForStmt &S) {
> +  RegionCounter Cnt = getPGORegionCounter(&S);
> +
>    JumpDest LoopExit = getJumpDestInCurrentScope("for.end");
>
>    RunCleanupsScope ForScope(*this);
> @@ -639,6 +677,7 @@ void CodeGenFunction::EmitForStmt(const
>    // Create a cleanup scope for the condition variable cleanups.
>    RunCleanupsScope ConditionScope(*this);
>
> +  llvm::BranchInst *CondBr = NULL;
>    if (S.getCond()) {
>      // If the for statement has a condition scope, emit the local variable
>      // declaration.
> @@ -658,7 +697,7 @@ void CodeGenFunction::EmitForStmt(const
>      // C99 6.8.5p2/p4: The first substatement is executed if the expression
>      // compares unequal to 0.  The condition must be a scalar type.
>      llvm::Value *BoolCondVal = EvaluateExprAsBool(S.getCond());
> -    Builder.CreateCondBr(BoolCondVal, ForBody, ExitBlock);
> +    CondBr = Builder.CreateCondBr(BoolCondVal, ForBody, ExitBlock);
>
>      if (ExitBlock != LoopExit.getBlock()) {
>        EmitBlock(ExitBlock);
> @@ -670,6 +709,7 @@ void CodeGenFunction::EmitForStmt(const
>      // Treat it as a non-zero constant.  Don't even create a new block for the
>      // body, just fall into it.
>    }
> +  Cnt.beginRegion(Builder);
>
>    // If the for loop doesn't have an increment we can just use the
>    // condition as the continue block.  Otherwise we'll need to create
> @@ -679,7 +719,7 @@ void CodeGenFunction::EmitForStmt(const
>      Continue = getJumpDestInCurrentScope("for.inc");
>
>    // Store the blocks to use for break and continue.
> -  BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
> +  BreakContinueStack.push_back(BreakContinue(LoopExit, Continue, &Cnt));
>
>    {
>      // Create a separate cleanup scope for the body, in case it is not
> @@ -693,9 +733,16 @@ void CodeGenFunction::EmitForStmt(const
>      EmitBlock(Continue.getBlock());
>      EmitStmt(S.getInc());
>    }
> +  Cnt.adjustFallThroughCount();
>
>    BreakContinueStack.pop_back();
>
> +  uint64_t LoopCount = Cnt.getCount();
> +  uint64_t ExitCount = Cnt.getLoopExitCount();
> +  if (S.getCond())
> +    CondBr->setMetadata(llvm::LLVMContext::MD_prof,
> +                        PGO.createBranchWeights(LoopCount, ExitCount));
> +
>    ConditionScope.ForceCleanup();
>    EmitBranch(CondBlock);
>
> @@ -706,9 +753,12 @@ void CodeGenFunction::EmitForStmt(const
>
>    // Emit the fall-through block.
>    EmitBlock(LoopExit.getBlock(), true);
> +  PGO.setCurrentRegionCount(ExitCount + Cnt.getBreakCounter().getCount());
>  }
>
>  void CodeGenFunction::EmitCXXForRangeStmt(const CXXForRangeStmt &S) {
> +  RegionCounter Cnt = getPGORegionCounter(&S);
> +
>    JumpDest LoopExit = getJumpDestInCurrentScope("for.end");
>
>    RunCleanupsScope ForScope(*this);
> @@ -739,7 +789,8 @@ void CodeGenFunction::EmitCXXForRangeStm
>    // The body is executed if the expression, contextually converted
>    // to bool, is true.
>    llvm::Value *BoolCondVal = EvaluateExprAsBool(S.getCond());
> -  Builder.CreateCondBr(BoolCondVal, ForBody, ExitBlock);
> +  llvm::BranchInst *CondBr = Builder.CreateCondBr(BoolCondVal,
> +                                                  ForBody, ExitBlock);
>
>    if (ExitBlock != LoopExit.getBlock()) {
>      EmitBlock(ExitBlock);
> @@ -747,12 +798,13 @@ void CodeGenFunction::EmitCXXForRangeStm
>    }
>
>    EmitBlock(ForBody);
> +  Cnt.beginRegion(Builder);
>
>    // Create a block for the increment. In case of a 'continue', we jump there.
>    JumpDest Continue = getJumpDestInCurrentScope("for.inc");
>
>    // Store the blocks to use for break and continue.
> -  BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
> +  BreakContinueStack.push_back(BreakContinue(LoopExit, Continue, &Cnt));
>
>    {
>      // Create a separate cleanup scope for the loop variable and body.
> @@ -764,9 +816,15 @@ void CodeGenFunction::EmitCXXForRangeStm
>    // If there is an increment, emit it next.
>    EmitBlock(Continue.getBlock());
>    EmitStmt(S.getInc());
> +  Cnt.adjustFallThroughCount();
>
>    BreakContinueStack.pop_back();
>
> +  uint64_t LoopCount = Cnt.getCount();
> +  uint64_t ExitCount = Cnt.getLoopExitCount();
> +  CondBr->setMetadata(llvm::LLVMContext::MD_prof,
> +                      PGO.createBranchWeights(LoopCount, ExitCount));
> +
>    EmitBranch(CondBlock);
>
>    ForScope.ForceCleanup();
> @@ -776,6 +834,7 @@ void CodeGenFunction::EmitCXXForRangeStm
>
>    // Emit the fall-through block.
>    EmitBlock(LoopExit.getBlock(), true);
> +  PGO.setCurrentRegionCount(ExitCount + Cnt.getBreakCounter().getCount());
>  }
>
>  void CodeGenFunction::EmitReturnOfRValue(RValue RV, QualType Ty) {
> @@ -789,6 +848,7 @@ void CodeGenFunction::EmitReturnOfRValue
>                         /*init*/ true);
>    }
>    EmitBranchThroughCleanup(ReturnBlock);
> +  PGO.setCurrentRegionCount(0);
>  }
>
>  /// EmitReturnStmt - Note that due to GCC extensions, this can have an operand
> @@ -860,6 +920,7 @@ void CodeGenFunction::EmitReturnStmt(con
>
>    cleanupScope.ForceCleanup();
>    EmitBranchThroughCleanup(ReturnBlock);
> +  PGO.setCurrentRegionCount(0);
>  }
>
>  void CodeGenFunction::EmitDeclStmt(const DeclStmt &S) {
> @@ -882,8 +943,14 @@ void CodeGenFunction::EmitBreakStmt(cons
>    if (HaveInsertPoint())
>      EmitStopPoint(&S);
>
> -  JumpDest Block = BreakContinueStack.back().BreakBlock;
> -  EmitBranchThroughCleanup(Block);
> +  BreakContinue &BC = BreakContinueStack.back();
> +  // We keep track of breaks from the loop so we can differentiate them from
> +  // non-local exits in PGO instrumentation. This only applies to loops, not
> +  // breaks from switch statements.
> +  if (BC.CountBreak)
> +    BC.LoopCnt->getBreakCounter().beginRegion(Builder);
> +  EmitBranchThroughCleanup(BC.BreakBlock);
> +  PGO.setCurrentRegionCount(0);
>  }
>
>  void CodeGenFunction::EmitContinueStmt(const ContinueStmt &S) {
> @@ -895,8 +962,12 @@ void CodeGenFunction::EmitContinueStmt(c
>    if (HaveInsertPoint())
>      EmitStopPoint(&S);
>
> -  JumpDest Block = BreakContinueStack.back().ContinueBlock;
> -  EmitBranchThroughCleanup(Block);
> +  BreakContinue &BC = BreakContinueStack.back();
> +  // We keep track of continues in the loop so we can differentiate them from
> +  // non-local exits in PGO instrumentation.
> +  BC.LoopCnt->getContinueCounter().beginRegion(Builder);
> +  EmitBranchThroughCleanup(BC.ContinueBlock);
> +  PGO.setCurrentRegionCount(0);
>  }
>
>  /// EmitCaseStmtRange - If case statement range is not too big then
> @@ -908,11 +979,14 @@ void CodeGenFunction::EmitCaseStmtRange(
>    llvm::APSInt LHS = S.getLHS()->EvaluateKnownConstInt(getContext());
>    llvm::APSInt RHS = S.getRHS()->EvaluateKnownConstInt(getContext());
>
> +  RegionCounter CaseCnt = getPGORegionCounter(&S);
> +
>    // Emit the code for this case. We do this first to make sure it is
>    // properly chained from our predecessor before generating the
>    // switch machinery to enter this block.
>    EmitBlock(createBasicBlock("sw.bb"));
>    llvm::BasicBlock *CaseDest = Builder.GetInsertBlock();
> +  CaseCnt.beginRegion(Builder);
>    EmitStmt(S.getSubStmt());
>
>    // If range is empty, do nothing.
> @@ -923,7 +997,17 @@ void CodeGenFunction::EmitCaseStmtRange(
>    // FIXME: parameters such as this should not be hardcoded.
>    if (Range.ult(llvm::APInt(Range.getBitWidth(), 64))) {
>      // Range is small enough to add multiple switch instruction cases.
> -    for (unsigned i = 0, e = Range.getZExtValue() + 1; i != e; ++i) {
> +    uint64_t Total = CaseCnt.getCount() - CaseCnt.getParentCount();
> +    unsigned NCases = Range.getZExtValue() + 1;
> +    // Divide the weights evenly between the cases, ensuring that the total
> +    // weight is preserved. Ie, a weight of 5 over three cases will be
> +    // distributed as weights of 2, 2, and 1.
> +    uint64_t Weight = Total / NCases, Rem = Total % NCases;
> +    for (unsigned I = 0; I != NCases; ++I) {
> +      if (SwitchWeights)
> +        SwitchWeights->push_back(Weight + (Rem ? 1 : 0));
> +      if (Rem)
> +        Rem--;
>        SwitchInsn->addCase(Builder.getInt(LHS), CaseDest);
>        LHS++;
>      }
> @@ -948,7 +1032,19 @@ void CodeGenFunction::EmitCaseStmtRange(
>      Builder.CreateSub(SwitchInsn->getCondition(), Builder.getInt(LHS));
>    llvm::Value *Cond =
>      Builder.CreateICmpULE(Diff, Builder.getInt(Range), "inbounds");
> -  Builder.CreateCondBr(Cond, CaseDest, FalseDest);
> +
> +  llvm::MDNode *Weights = 0;
> +  if (SwitchWeights) {
> +    uint64_t ThisCount = CaseCnt.getCount() - CaseCnt.getParentCount();
> +    uint64_t DefaultCount = (*SwitchWeights)[0];
> +    Weights = PGO.createBranchWeights(ThisCount, DefaultCount);
> +
> +    // Since we're chaining the switch default through each large case range, we
> +    // need to update the weight for the default, ie, the first case, to include
> +    // this case.
> +    (*SwitchWeights)[0] += ThisCount;
> +  }
> +  Builder.CreateCondBr(Cond, CaseDest, FalseDest, Weights);
>
>    // Restore the appropriate insertion point.
>    if (RestoreBB)
> @@ -974,17 +1070,22 @@ void CodeGenFunction::EmitCaseStmt(const
>      return;
>    }
>
> +  RegionCounter CaseCnt = getPGORegionCounter(&S);
>    llvm::ConstantInt *CaseVal =
>      Builder.getInt(S.getLHS()->EvaluateKnownConstInt(getContext()));
>
> -  // If the body of the case is just a 'break', and if there was no fallthrough,
> -  // try to not emit an empty block.
> -  if ((CGM.getCodeGenOpts().OptimizationLevel > 0) &&
> +  // If the body of the case is just a 'break', try to not emit an empty block.
> +  // If we're profiling or we're not optimizing, leave the block in for better
> +  // debug and coverage analysis.
> +  if (!CGM.getCodeGenOpts().ProfileInstrGenerate &&
> +      CGM.getCodeGenOpts().OptimizationLevel > 0 &&
>        isa<BreakStmt>(S.getSubStmt())) {
>      JumpDest Block = BreakContinueStack.back().BreakBlock;
>
>      // Only do this optimization if there are no cleanups that need emitting.
>      if (isObviouslyBranchWithoutCleanups(Block)) {
> +      if (SwitchWeights)
> +        SwitchWeights->push_back(CaseCnt.getCount() - CaseCnt.getParentCount());
>        SwitchInsn->addCase(CaseVal, Block.getBlock());
>
>        // If there was a fallthrough into this case, make sure to redirect it to
> @@ -999,6 +1100,9 @@ void CodeGenFunction::EmitCaseStmt(const
>
>    EmitBlock(createBasicBlock("sw.bb"));
>    llvm::BasicBlock *CaseDest = Builder.GetInsertBlock();
> +  if (SwitchWeights)
> +    SwitchWeights->push_back(CaseCnt.getCount() - CaseCnt.getParentCount());
> +  CaseCnt.beginRegion(Builder);
>    SwitchInsn->addCase(CaseVal, CaseDest);
>
>    // Recursively emitting the statement is acceptable, but is not wonderful for
> @@ -1016,8 +1120,14 @@ void CodeGenFunction::EmitCaseStmt(const
>    // Otherwise, iteratively add consecutive cases to this switch stmt.
>    while (NextCase && NextCase->getRHS() == 0) {
>      CurCase = NextCase;
> -    llvm::ConstantInt *CaseVal =
> +    llvm::ConstantInt *CaseVal =
>        Builder.getInt(CurCase->getLHS()->EvaluateKnownConstInt(getContext()));
> +
> +    CaseCnt = getPGORegionCounter(NextCase);
> +    if (SwitchWeights)
> +      SwitchWeights->push_back(CaseCnt.getCount() - CaseCnt.getParentCount());
> +    CaseCnt.beginRegion(Builder);
> +
>      SwitchInsn->addCase(CaseVal, CaseDest);
>      NextCase = dyn_cast<CaseStmt>(CurCase->getSubStmt());
>    }
> @@ -1030,7 +1140,22 @@ void CodeGenFunction::EmitDefaultStmt(co
>    llvm::BasicBlock *DefaultBlock = SwitchInsn->getDefaultDest();
>    assert(DefaultBlock->empty() &&
>           "EmitDefaultStmt: Default block already defined?");
> +
> +  llvm::BasicBlock *SkipCountBB = 0;
> +  if (CGM.getCodeGenOpts().ProfileInstrGenerate) {
> +    // The PGO region here needs to count the number of times the edge occurs,
> +    // so fallthrough into this case will jump past the region counter to the
> +    // skipcount basic block.
> +    SkipCountBB = createBasicBlock("skipcount");
> +    EmitBranch(SkipCountBB);
> +  }
>    EmitBlock(DefaultBlock);
> +
> +  RegionCounter Cnt = getPGORegionCounter(&S);
> +  Cnt.beginRegion(Builder, /*AddIncomingFallThrough=*/true);
> +
> +  if (SkipCountBB)
> +    EmitBlock(SkipCountBB);
>    EmitStmt(S.getSubStmt());
>  }
>
> @@ -1187,7 +1312,8 @@ static CSFC_Result CollectStatementsForC
>  static bool FindCaseStatementsForValue(const SwitchStmt &S,
>                                         const llvm::APSInt &ConstantCondValue,
>                                  SmallVectorImpl<const Stmt*> &ResultStmts,
> -                                       ASTContext &C) {
> +                                       ASTContext &C,
> +                                       const SwitchCase *&ResultCase) {
>    // First step, find the switch case that is being branched to.  We can do this
>    // efficiently by scanning the SwitchCase list.
>    const SwitchCase *Case = S.getSwitchCaseList();
> @@ -1230,6 +1356,7 @@ static bool FindCaseStatementsForValue(c
>    //   while (1) {
>    //     case 4: ...
>    bool FoundCase = false;
> +  ResultCase = Case;
>    return CollectStatementsForCase(S.getBody(), Case, FoundCase,
>                                    ResultStmts) != CSFC_Failure &&
>           FoundCase;
> @@ -1245,6 +1372,7 @@ void CodeGenFunction::EmitSwitchStmt(con
>
>    // Handle nested switch statements.
>    llvm::SwitchInst *SavedSwitchInsn = SwitchInsn;
> +  SmallVector<uint64_t, 16> *SavedSwitchWeights = SwitchWeights;
>    llvm::BasicBlock *SavedCRBlock = CaseRangeBlock;
>
>    // See if we can constant fold the condition of the switch and therefore only
> @@ -1252,8 +1380,14 @@ void CodeGenFunction::EmitSwitchStmt(con
>    llvm::APSInt ConstantCondValue;
>    if (ConstantFoldsToSimpleInteger(S.getCond(), ConstantCondValue)) {
>      SmallVector<const Stmt*, 4> CaseStmts;
> +    const SwitchCase *Case = 0;
>      if (FindCaseStatementsForValue(S, ConstantCondValue, CaseStmts,
> -                                   getContext())) {
> +                                   getContext(), Case)) {
> +      PGO.setCurrentRegionCount(0);
> +      if (Case) {
> +        RegionCounter CaseCnt = getPGORegionCounter(Case);
> +        CaseCnt.beginRegion(Builder);
> +      }
>        RunCleanupsScope ExecutedScope(*this);
>
>        // At this point, we are no longer "within" a switch instance, so
> @@ -1265,6 +1399,8 @@ void CodeGenFunction::EmitSwitchStmt(con
>        // specified series of statements and we're good.
>        for (unsigned i = 0, e = CaseStmts.size(); i != e; ++i)
>          EmitStmt(CaseStmts[i]);
> +      RegionCounter ExitCnt = getPGORegionCounter(&S);
> +      ExitCnt.beginRegion(Builder);
>
>        // Now we want to restore the saved switch instance so that nested
>        // switches continue to function properly
> @@ -1282,18 +1418,41 @@ void CodeGenFunction::EmitSwitchStmt(con
>    // failure.
>    llvm::BasicBlock *DefaultBlock = createBasicBlock("sw.default");
>    SwitchInsn = Builder.CreateSwitch(CondV, DefaultBlock);
> +  if (PGO.haveRegionCounts()) {
> +    // Walk the SwitchCase list to find how many there are.
> +    uint64_t DefaultCount = 0;
> +    unsigned NumCases = 0;
> +    for (const SwitchCase *Case = S.getSwitchCaseList();
> +         Case;
> +         Case = Case->getNextSwitchCase()) {
> +      if (isa<DefaultStmt>(Case))
> +        DefaultCount = getPGORegionCounter(Case).getCount();
> +      NumCases += 1;
> +    }
> +    SwitchWeights = new SmallVector<uint64_t, 16>();
> +    SwitchWeights->reserve(NumCases);
> +    // The default needs to be first. We store the edge count, so we already
> +    // know the right weight.
> +    SwitchWeights->push_back(DefaultCount);
> +  }
>    CaseRangeBlock = DefaultBlock;
>
>    // Clear the insertion point to indicate we are in unreachable code.
>    Builder.ClearInsertionPoint();
> +  PGO.setCurrentRegionCount(0);
>
>    // All break statements jump to NextBlock. If BreakContinueStack is non-empty
> -  // then reuse last ContinueBlock.
> +  // then reuse last ContinueBlock and that block's counter.
>    JumpDest OuterContinue;
> -  if (!BreakContinueStack.empty())
> -    OuterContinue = BreakContinueStack.back().ContinueBlock;
> +  RegionCounter *OuterCount = 0;
> +  if (!BreakContinueStack.empty()) {
> +    BreakContinue &BC = BreakContinueStack.back();
> +    OuterContinue = BC.ContinueBlock;
> +    OuterCount = BC.LoopCnt;
> +  }
>
> -  BreakContinueStack.push_back(BreakContinue(SwitchExit, OuterContinue));
> +  BreakContinueStack.push_back(BreakContinue(SwitchExit, OuterContinue,
> +                                             OuterCount, /*CountBreak=*/false));
>
>    // Emit switch body.
>    EmitStmt(S.getBody());
> @@ -1322,8 +1481,20 @@ void CodeGenFunction::EmitSwitchStmt(con
>
>    // Emit continuation.
>    EmitBlock(SwitchExit.getBlock(), true);
> +  RegionCounter ExitCnt = getPGORegionCounter(&S);
> +  ExitCnt.beginRegion(Builder);
>
> +  if (SwitchWeights) {
> +    assert(SwitchWeights->size() == 1 + SwitchInsn->getNumCases() &&
> +           "switch weights do not match switch cases");
> +    // If there's only one jump destination there's no sense weighting it.
> +    if (SwitchWeights->size() > 1)
> +      SwitchInsn->setMetadata(llvm::LLVMContext::MD_prof,
> +                              PGO.createBranchWeights(*SwitchWeights));
> +    delete SwitchWeights;
> +  }
>    SwitchInsn = SavedSwitchInsn;
> +  SwitchWeights = SavedSwitchWeights;
>    CaseRangeBlock = SavedCRBlock;
>  }
>
>
> Modified: cfe/trunk/lib/CodeGen/CMakeLists.txt
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CMakeLists.txt?rev=198640&r1=198639&r2=198640&view=diff
> ==============================================================================
> --- cfe/trunk/lib/CodeGen/CMakeLists.txt (original)
> +++ cfe/trunk/lib/CodeGen/CMakeLists.txt Mon Jan  6 16:27:43 2014
> @@ -53,6 +53,7 @@ add_clang_library(clangCodeGen
>    CodeGenModule.cpp
>    CodeGenTBAA.cpp
>    CodeGenTypes.cpp
> +  CodeGenPGO.cpp
>    ItaniumCXXABI.cpp
>    MicrosoftCXXABI.cpp
>    ModuleBuilder.cpp
>
> Modified: cfe/trunk/lib/CodeGen/CodeGenFunction.cpp
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CodeGenFunction.cpp?rev=198640&r1=198639&r2=198640&view=diff
> ==============================================================================
> --- cfe/trunk/lib/CodeGen/CodeGenFunction.cpp (original)
> +++ cfe/trunk/lib/CodeGen/CodeGenFunction.cpp Mon Jan  6 16:27:43 2014
> @@ -16,6 +16,7 @@
>  #include "CGCXXABI.h"
>  #include "CGDebugInfo.h"
>  #include "CodeGenModule.h"
> +#include "CodeGenPGO.h"
>  #include "TargetInfo.h"
>  #include "clang/AST/ASTContext.h"
>  #include "clang/AST/Decl.h"
> @@ -44,7 +45,8 @@ CodeGenFunction::CodeGenFunction(CodeGen
>        NextCleanupDestIndex(1), FirstBlockInfo(0), EHResumeBlock(0),
>        ExceptionSlot(0), EHSelectorSlot(0), DebugInfo(CGM.getModuleDebugInfo()),
>        DisableDebugInfo(false), DidCallStackSave(false), IndirectBranch(0),
> -      SwitchInsn(0), CaseRangeBlock(0), UnreachableBlock(0), NumReturnExprs(0),
> +      PGO(cgm), SwitchInsn(0), SwitchWeights(0),
> +      CaseRangeBlock(0), UnreachableBlock(0), NumReturnExprs(0),
>        NumSimpleReturnExprs(0), CXXABIThisDecl(0), CXXABIThisValue(0),
>        CXXThisValue(0), CXXDefaultInitExprThis(0),
>        CXXStructorImplicitParamDecl(0), CXXStructorImplicitParamValue(0),
> @@ -571,6 +573,8 @@ void CodeGenFunction::StartFunction(Glob
>    if (CGM.getCodeGenOpts().InstrumentForProfiling)
>      EmitMCountInstrumentation();
>
> +  PGO.assignRegionCounters(GD);
> +
>    if (RetTy->isVoidType()) {
>      // Void type; nothing to return.
>      ReturnValue = 0;
> @@ -643,6 +647,8 @@ void CodeGenFunction::StartFunction(Glob
>
>  void CodeGenFunction::EmitFunctionBody(FunctionArgList &Args,
>                                         const Stmt *Body) {
> +  RegionCounter Cnt = getPGORegionCounter(Body);
> +  Cnt.beginRegion(Builder);
>    if (const CompoundStmt *S = dyn_cast<CompoundStmt>(Body))
>      EmitCompoundStmtWithoutScope(*S);
>    else
> @@ -772,6 +778,9 @@ void CodeGenFunction::GenerateCode(Globa
>    // a quick pass now to see if we can.
>    if (!CurFn->doesNotThrow())
>      TryMarkNoThrow(CurFn);
> +
> +  PGO.emitWriteoutFunction(CurGD);
> +  PGO.destroyRegionCounters();
>  }
>
>  /// ContainsLabel - Return true if the statement contains a label in it.  If
> @@ -870,10 +879,13 @@ ConstantFoldsToSimpleInteger(const Expr
>  ///
>  void CodeGenFunction::EmitBranchOnBoolExpr(const Expr *Cond,
>                                             llvm::BasicBlock *TrueBlock,
> -                                           llvm::BasicBlock *FalseBlock) {
> +                                           llvm::BasicBlock *FalseBlock,
> +                                           uint64_t TrueCount) {
>    Cond = Cond->IgnoreParens();
>
>    if (const BinaryOperator *CondBOp = dyn_cast<BinaryOperator>(Cond)) {
> +    RegionCounter Cnt = getPGORegionCounter(CondBOp);
> +
>      // Handle X && Y in a condition.
>      if (CondBOp->getOpcode() == BO_LAnd) {
>        // If we have "1 && X", simplify the code.  "0 && X" would have constant
> @@ -882,7 +894,9 @@ void CodeGenFunction::EmitBranchOnBoolEx
>        if (ConstantFoldsToSimpleInteger(CondBOp->getLHS(), ConstantBool) &&
>            ConstantBool) {
>          // br(1 && X) -> br(X).
> -        return EmitBranchOnBoolExpr(CondBOp->getRHS(), TrueBlock, FalseBlock);
> +        Cnt.beginRegion(Builder);
> +        return EmitBranchOnBoolExpr(CondBOp->getRHS(), TrueBlock, FalseBlock,
> +                                    TrueCount);
>        }
>
>        // If we have "X && 1", simplify the code to use an uncond branch.
> @@ -890,21 +904,28 @@ void CodeGenFunction::EmitBranchOnBoolEx
>        if (ConstantFoldsToSimpleInteger(CondBOp->getRHS(), ConstantBool) &&
>            ConstantBool) {
>          // br(X && 1) -> br(X).
> -        return EmitBranchOnBoolExpr(CondBOp->getLHS(), TrueBlock, FalseBlock);
> +        return EmitBranchOnBoolExpr(CondBOp->getLHS(), TrueBlock, FalseBlock,
> +                                    TrueCount);
>        }
>
>        // Emit the LHS as a conditional.  If the LHS conditional is false, we
>        // want to jump to the FalseBlock.
>        llvm::BasicBlock *LHSTrue = createBasicBlock("land.lhs.true");
> +      // The counter tells us how often we evaluate RHS, and all of TrueCount
> +      // can be propagated to that branch.
> +      uint64_t RHSCount = Cnt.getCount();
>
>        ConditionalEvaluation eval(*this);
> -      EmitBranchOnBoolExpr(CondBOp->getLHS(), LHSTrue, FalseBlock);
> +      EmitBranchOnBoolExpr(CondBOp->getLHS(), LHSTrue, FalseBlock, RHSCount);
>        EmitBlock(LHSTrue);
>
>        // Any temporaries created here are conditional.
> +      Cnt.beginRegion(Builder);
>        eval.begin(*this);
> -      EmitBranchOnBoolExpr(CondBOp->getRHS(), TrueBlock, FalseBlock);
> +      EmitBranchOnBoolExpr(CondBOp->getRHS(), TrueBlock, FalseBlock, TrueCount);
>        eval.end(*this);
> +      Cnt.adjustFallThroughCount();
> +      Cnt.applyAdjustmentsToRegion();
>
>        return;
>      }
> @@ -916,7 +937,9 @@ void CodeGenFunction::EmitBranchOnBoolEx
>        if (ConstantFoldsToSimpleInteger(CondBOp->getLHS(), ConstantBool) &&
>            !ConstantBool) {
>          // br(0 || X) -> br(X).
> -        return EmitBranchOnBoolExpr(CondBOp->getRHS(), TrueBlock, FalseBlock);
> +        Cnt.beginRegion(Builder);
> +        return EmitBranchOnBoolExpr(CondBOp->getRHS(), TrueBlock, FalseBlock,
> +                                    TrueCount);
>        }
>
>        // If we have "X || 0", simplify the code to use an uncond branch.
> @@ -924,21 +947,31 @@ void CodeGenFunction::EmitBranchOnBoolEx
>        if (ConstantFoldsToSimpleInteger(CondBOp->getRHS(), ConstantBool) &&
>            !ConstantBool) {
>          // br(X || 0) -> br(X).
> -        return EmitBranchOnBoolExpr(CondBOp->getLHS(), TrueBlock, FalseBlock);
> +        return EmitBranchOnBoolExpr(CondBOp->getLHS(), TrueBlock, FalseBlock,
> +                                    TrueCount);
>        }
>
>        // Emit the LHS as a conditional.  If the LHS conditional is true, we
>        // want to jump to the TrueBlock.
>        llvm::BasicBlock *LHSFalse = createBasicBlock("lor.lhs.false");
> +      // We have the count for entry to the RHS and for the whole expression
> +      // being true, so we can divy up True count between the short circuit and
> +      // the RHS.
> +      uint64_t LHSCount = TrueCount - Cnt.getCount();
> +      uint64_t RHSCount = TrueCount - LHSCount;
>
>        ConditionalEvaluation eval(*this);
> -      EmitBranchOnBoolExpr(CondBOp->getLHS(), TrueBlock, LHSFalse);
> +      EmitBranchOnBoolExpr(CondBOp->getLHS(), TrueBlock, LHSFalse, LHSCount);
>        EmitBlock(LHSFalse);
>
>        // Any temporaries created here are conditional.
> +      Cnt.beginRegion(Builder);
>        eval.begin(*this);
> -      EmitBranchOnBoolExpr(CondBOp->getRHS(), TrueBlock, FalseBlock);
> +      EmitBranchOnBoolExpr(CondBOp->getRHS(), TrueBlock, FalseBlock, RHSCount);
> +
>        eval.end(*this);
> +      Cnt.adjustFallThroughCount();
> +      Cnt.applyAdjustmentsToRegion();
>
>        return;
>      }
> @@ -946,8 +979,13 @@ void CodeGenFunction::EmitBranchOnBoolEx
>
>    if (const UnaryOperator *CondUOp = dyn_cast<UnaryOperator>(Cond)) {
>      // br(!x, t, f) -> br(x, f, t)
> -    if (CondUOp->getOpcode() == UO_LNot)
> -      return EmitBranchOnBoolExpr(CondUOp->getSubExpr(), FalseBlock, TrueBlock);
> +    if (CondUOp->getOpcode() == UO_LNot) {
> +      // Negate the count.
> +      uint64_t FalseCount = PGO.getCurrentRegionCount() - TrueCount;
> +      // Negate the condition and swap the destination blocks.
> +      return EmitBranchOnBoolExpr(CondUOp->getSubExpr(), FalseBlock, TrueBlock,
> +                                  FalseCount);
> +    }
>    }
>
>    if (const ConditionalOperator *CondOp = dyn_cast<ConditionalOperator>(Cond)) {
> @@ -955,17 +993,33 @@ void CodeGenFunction::EmitBranchOnBoolEx
>      llvm::BasicBlock *LHSBlock = createBasicBlock("cond.true");
>      llvm::BasicBlock *RHSBlock = createBasicBlock("cond.false");
>
> +    RegionCounter Cnt = getPGORegionCounter(CondOp);
>      ConditionalEvaluation cond(*this);
> -    EmitBranchOnBoolExpr(CondOp->getCond(), LHSBlock, RHSBlock);
> +    EmitBranchOnBoolExpr(CondOp->getCond(), LHSBlock, RHSBlock, Cnt.getCount());
> +
> +    // When computing PGO branch weights, we only know the overall count for
> +    // the true block. This code is essentially doing tail duplication of the
> +    // naive code-gen, introducing new edges for which counts are not
> +    // available. Divide the counts proportionally between the LHS and RHS of
> +    // the conditional operator.
> +    uint64_t LHSScaledTrueCount = 0;
> +    if (TrueCount) {
> +      double LHSRatio = Cnt.getCount() / (double) PGO.getCurrentRegionCount();
> +      LHSScaledTrueCount = TrueCount * LHSRatio;
> +    }
>
>      cond.begin(*this);
>      EmitBlock(LHSBlock);
> -    EmitBranchOnBoolExpr(CondOp->getLHS(), TrueBlock, FalseBlock);
> +    Cnt.beginRegion(Builder);
> +    EmitBranchOnBoolExpr(CondOp->getLHS(), TrueBlock, FalseBlock,
> +                         LHSScaledTrueCount);
>      cond.end(*this);
>
>      cond.begin(*this);
>      EmitBlock(RHSBlock);
> -    EmitBranchOnBoolExpr(CondOp->getRHS(), TrueBlock, FalseBlock);
> +    Cnt.beginElseRegion();
> +    EmitBranchOnBoolExpr(CondOp->getRHS(), TrueBlock, FalseBlock,
> +                         TrueCount - LHSScaledTrueCount);
>      cond.end(*this);
>
>      return;
> @@ -981,9 +1035,15 @@ void CodeGenFunction::EmitBranchOnBoolEx
>      return;
>    }
>
> +  // Create branch weights based on the number of times we get here and the
> +  // number of times the condition should be true.
> +  uint64_t CurrentCount = PGO.getCurrentRegionCountWithMin(TrueCount);
> +  llvm::MDNode *Weights = PGO.createBranchWeights(TrueCount,
> +                                                  CurrentCount - TrueCount);
> +
>    // Emit the code with the fully general case.
>    llvm::Value *CondV = EvaluateExprAsBool(Cond);
> -  Builder.CreateCondBr(CondV, TrueBlock, FalseBlock);
> +  Builder.CreateCondBr(CondV, TrueBlock, FalseBlock, Weights);
>  }
>
>  /// ErrorUnsupported - Print out an error that codegen doesn't support the
>
> Modified: cfe/trunk/lib/CodeGen/CodeGenFunction.h
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CodeGenFunction.h?rev=198640&r1=198639&r2=198640&view=diff
> ==============================================================================
> --- cfe/trunk/lib/CodeGen/CodeGenFunction.h (original)
> +++ cfe/trunk/lib/CodeGen/CodeGenFunction.h Mon Jan  6 16:27:43 2014
> @@ -19,6 +19,7 @@
>  #include "CGValue.h"
>  #include "EHScopeStack.h"
>  #include "CodeGenModule.h"
> +#include "CodeGenPGO.h"
>  #include "clang/AST/CharUnits.h"
>  #include "clang/AST/ExprCXX.h"
>  #include "clang/AST/ExprObjC.h"
> @@ -817,19 +818,36 @@ private:
>    llvm::DenseMap<const LabelDecl*, JumpDest> LabelMap;
>
>    // BreakContinueStack - This keeps track of where break and continue
> -  // statements should jump to.
> +  // statements should jump to and the associated base counter for
> +  // instrumentation.
>    struct BreakContinue {
> -    BreakContinue(JumpDest Break, JumpDest Continue)
> -      : BreakBlock(Break), ContinueBlock(Continue) {}
> +    BreakContinue(JumpDest Break, JumpDest Continue, RegionCounter *LoopCnt,
> +                  bool CountBreak = true)
> +      : BreakBlock(Break), ContinueBlock(Continue), LoopCnt(LoopCnt),
> +        CountBreak(CountBreak) {}
>
>      JumpDest BreakBlock;
>      JumpDest ContinueBlock;
> +    RegionCounter *LoopCnt;
> +    bool CountBreak;
>    };
>    SmallVector<BreakContinue, 8> BreakContinueStack;
>
> +  CodeGenPGO PGO;
> +
> +public:
> +  /// Get a counter for instrumentation of the region associated with the given
> +  /// statement.
> +  RegionCounter getPGORegionCounter(const Stmt *S) {
> +    return RegionCounter(PGO, S);
> +  }
> +private:
> +
>    /// SwitchInsn - This is nearest current switch instruction. It is null if
>    /// current context is not in a switch.
>    llvm::SwitchInst *SwitchInsn;
> +  /// The branch weights of SwitchInsn when doing instrumentation based PGO.
> +  SmallVector<uint64_t, 16> *SwitchWeights;
>
>    /// CaseRangeBlock - This block holds if condition check for last case
>    /// statement range in current switch instruction.
> @@ -2413,8 +2431,10 @@ public:
>    /// EmitBranchOnBoolExpr - Emit a branch on a boolean condition (e.g. for an
>    /// if statement) to the specified blocks.  Based on the condition, this might
>    /// try to simplify the codegen of the conditional based on the branch.
> +  /// TrueCount should be the number of times we expect the condition to
> +  /// evaluate to true based on PGO data.
>    void EmitBranchOnBoolExpr(const Expr *Cond, llvm::BasicBlock *TrueBlock,
> -                            llvm::BasicBlock *FalseBlock);
> +                            llvm::BasicBlock *FalseBlock, uint64_t TrueCount);
>
>    /// \brief Emit a description of a type in a format suitable for passing to
>    /// a runtime sanitizer handler.
>
> Modified: cfe/trunk/lib/CodeGen/CodeGenModule.cpp
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CodeGenModule.cpp?rev=198640&r1=198639&r2=198640&view=diff
> ==============================================================================
> --- cfe/trunk/lib/CodeGen/CodeGenModule.cpp (original)
> +++ cfe/trunk/lib/CodeGen/CodeGenModule.cpp Mon Jan  6 16:27:43 2014
> @@ -20,6 +20,7 @@
>  #include "CGOpenCLRuntime.h"
>  #include "CodeGenFunction.h"
>  #include "CodeGenTBAA.h"
> +#include "CodeGenPGO.h"
>  #include "TargetInfo.h"
>  #include "clang/AST/ASTContext.h"
>  #include "clang/AST/CharUnits.h"
> @@ -77,7 +78,8 @@ CodeGenModule::CodeGenModule(ASTContext
>        ABI(createCXXABI(*this)), VMContext(M.getContext()), TBAA(0),
>        TheTargetCodeGenInfo(0), Types(*this), VTables(*this), ObjCRuntime(0),
>        OpenCLRuntime(0), CUDARuntime(0), DebugInfo(0), ARCData(0),
> -      NoObjCARCExceptionsMetadata(0), RRData(0), CFConstantStringClassRef(0),
> +      NoObjCARCExceptionsMetadata(0), RRData(0), PGOData(0),
> +      CFConstantStringClassRef(0),
>        ConstantStringClassRef(0), NSConstantStringType(0),
>        NSConcreteGlobalBlock(0), NSConcreteStackBlock(0), BlockObjectAssign(0),
>        BlockObjectDispose(0), BlockDescriptorType(0), GenericBlockLiteralType(0),
> @@ -131,6 +133,9 @@ CodeGenModule::CodeGenModule(ASTContext
>    if (C.getLangOpts().ObjCAutoRefCount)
>      ARCData = new ARCEntrypoints();
>    RRData = new RREntrypoints();
> +
> +  if (!CodeGenOpts.InstrProfileInput.empty())
> +    PGOData = new PGOProfileData(*this, CodeGenOpts.InstrProfileInput);
>  }
>
>  CodeGenModule::~CodeGenModule() {
> @@ -2181,6 +2186,10 @@ void CodeGenModule::EmitGlobalFunctionDe
>      AddGlobalDtor(Fn, DA->getPriority());
>    if (D->hasAttr<AnnotateAttr>())
>      AddGlobalAnnotations(D, Fn);
> +
> +  llvm::Function *PGOInit = CodeGenPGO::emitInitialization(*this);
> +  if (PGOInit)
> +    AddGlobalCtor(PGOInit, 0);
>  }
>
>  void CodeGenModule::EmitAliasDefinition(GlobalDecl GD) {
>
> Modified: cfe/trunk/lib/CodeGen/CodeGenModule.h
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CodeGenModule.h?rev=198640&r1=198639&r2=198640&view=diff
> ==============================================================================
> --- cfe/trunk/lib/CodeGen/CodeGenModule.h (original)
> +++ cfe/trunk/lib/CodeGen/CodeGenModule.h Mon Jan  6 16:27:43 2014
> @@ -85,7 +85,8 @@ namespace CodeGen {
>    class CGCUDARuntime;
>    class BlockFieldFlags;
>    class FunctionArgList;
> -
> +  class PGOProfileData;
> +
>    struct OrderGlobalInits {
>      unsigned int priority;
>      unsigned int lex_order;
> @@ -258,6 +259,7 @@ class CodeGenModule : public CodeGenType
>    ARCEntrypoints *ARCData;
>    llvm::MDNode *NoObjCARCExceptionsMetadata;
>    RREntrypoints *RRData;
> +  PGOProfileData *PGOData;
>
>    // WeakRefReferences - A set of references that have only been seen via
>    // a weakref so far. This is used to remove the weak of the reference if we
> @@ -479,6 +481,10 @@ public:
>      return *RRData;
>    }
>
> +  PGOProfileData *getPGOData() const {
> +    return PGOData;
> +  }
> +
>    llvm::Constant *getStaticLocalDeclAddress(const VarDecl *D) {
>      return StaticLocalDeclMap[D];
>    }
>
> Added: cfe/trunk/lib/CodeGen/CodeGenPGO.cpp
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CodeGenPGO.cpp?rev=198640&view=auto
> ==============================================================================
> --- cfe/trunk/lib/CodeGen/CodeGenPGO.cpp (added)
> +++ cfe/trunk/lib/CodeGen/CodeGenPGO.cpp Mon Jan  6 16:27:43 2014
> @@ -0,0 +1,456 @@
> +//===--- CodeGenPGO.cpp - PGO Instrumentation for LLVM CodeGen --*- C++ -*-===//
> +//
> +//                     The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
> +//===----------------------------------------------------------------------===//
> +//
> +// Instrumentation-based profile-guided optimization
> +//
> +//===----------------------------------------------------------------------===//
> +
> +#include "CodeGenPGO.h"
> +#include "CodeGenFunction.h"
> +#include "clang/AST/RecursiveASTVisitor.h"
> +#include "clang/AST/StmtVisitor.h"
> +#include "llvm/IR/MDBuilder.h"
> +#include "llvm/Support/FileSystem.h"
> +
> +using namespace clang;
> +using namespace CodeGen;
> +
> +static void ReportBadPGOData(CodeGenModule &CGM, const char *Message) {
> +  DiagnosticsEngine &Diags = CGM.getDiags();
> +  unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error, Message);
> +  Diags.Report(DiagID);
> +}
> +
> +PGOProfileData::PGOProfileData(CodeGenModule &CGM, std::string Path)
> +  : CGM(CGM) {
> +  if (llvm::MemoryBuffer::getFile(Path, DataBuffer)) {
> +    ReportBadPGOData(CGM, "failed to open pgo data file");
> +    return;
> +  }
> +
> +  if (DataBuffer->getBufferSize() > std::numeric_limits<unsigned>::max()) {
> +    ReportBadPGOData(CGM, "pgo data file too big");
> +    return;
> +  }
> +
> +  // Scan through the data file and map each function to the corresponding
> +  // file offset where its counts are stored.
> +  const char *BufferStart = DataBuffer->getBufferStart();
> +  const char *BufferEnd = DataBuffer->getBufferEnd();
> +  const char *CurPtr = BufferStart;
> +  while (CurPtr < BufferEnd) {
> +    // Read the mangled function name.
> +    const char *FuncName = CurPtr;
> +    // FIXME: Something will need to be added to distinguish static functions.
> +    CurPtr = strchr(CurPtr, ' ');
> +    if (!CurPtr) {
> +      ReportBadPGOData(CGM, "pgo data file has malformed function entry");
> +      return;
> +    }
> +    StringRef MangledName(FuncName, CurPtr - FuncName);
> +
> +    // Read the number of counters.
> +    char *EndPtr;
> +    unsigned NumCounters = strtol(++CurPtr, &EndPtr, 10);
> +    if (EndPtr == CurPtr || *EndPtr != '\n' || NumCounters <= 0) {
> +      ReportBadPGOData(CGM, "pgo data file has unexpected number of counters");
> +      return;
> +    }
> +    CurPtr = EndPtr;
> +
> +    // There is one line for each counter; skip over those lines.
> +    for (unsigned N = 0; N < NumCounters; ++N) {
> +      CurPtr = strchr(++CurPtr, '\n');
> +      if (!CurPtr) {
> +        ReportBadPGOData(CGM, "pgo data file is missing some counter info");
> +        return;
> +      }
> +    }
> +
> +    // Skip over the blank line separating functions.
> +    CurPtr += 2;
> +
> +    DataOffsets[MangledName] = FuncName - BufferStart;
> +  }
> +}
> +
> +bool PGOProfileData::getFunctionCounts(StringRef MangledName,
> +                                       std::vector<uint64_t> &Counts) {
> +  // Find the relevant section of the pgo-data file.
> +  llvm::StringMap<unsigned>::const_iterator OffsetIter =
> +    DataOffsets.find(MangledName);
> +  if (OffsetIter == DataOffsets.end())
> +    return true;
> +  const char *CurPtr = DataBuffer->getBufferStart() + OffsetIter->getValue();
> +
> +  // Skip over the function name.
> +  CurPtr = strchr(CurPtr, ' ');
> +  assert(CurPtr && "pgo-data has corrupted function entry");
> +
> +  // Read the number of counters.
> +  char *EndPtr;
> +  unsigned NumCounters = strtol(++CurPtr, &EndPtr, 10);
> +  assert(EndPtr != CurPtr && *EndPtr == '\n' && NumCounters > 0 &&
> +         "pgo-data file has corrupted number of counters");
> +  CurPtr = EndPtr;
> +
> +  Counts.reserve(NumCounters);
> +
> +  for (unsigned N = 0; N < NumCounters; ++N) {
> +    // Read the count value.
> +    uint64_t Count = strtoll(CurPtr, &EndPtr, 10);
> +    if (EndPtr == CurPtr || *EndPtr != '\n') {
> +      ReportBadPGOData(CGM, "pgo-data file has bad count value");
> +      return true;
> +    }
> +    Counts.push_back(Count);
> +    CurPtr = EndPtr + 1;
> +  }
> +
> +  // Make sure the number of counters matches up.
> +  if (Counts.size() != NumCounters) {
> +    ReportBadPGOData(CGM, "pgo-data file has inconsistent counters");
> +    return true;
> +  }
> +
> +  return false;
> +}
> +
> +void CodeGenPGO::emitWriteoutFunction(GlobalDecl &GD) {
> +  if (!CGM.getCodeGenOpts().ProfileInstrGenerate)
> +    return;
> +
> +  llvm::LLVMContext &Ctx = CGM.getLLVMContext();
> +
> +  llvm::Type *Int32Ty = llvm::Type::getInt32Ty(Ctx);
> +  llvm::Type *Int8PtrTy = llvm::Type::getInt8PtrTy(Ctx);
> +
> +  llvm::Function *WriteoutF =
> +    CGM.getModule().getFunction("__llvm_pgo_writeout");
> +  if (!WriteoutF) {
> +    llvm::FunctionType *WriteoutFTy =
> +      llvm::FunctionType::get(llvm::Type::getVoidTy(Ctx), false);
> +    WriteoutF = llvm::Function::Create(WriteoutFTy,
> +                                       llvm::GlobalValue::InternalLinkage,
> +                                       "__llvm_pgo_writeout", &CGM.getModule());
> +  }
> +  WriteoutF->setUnnamedAddr(true);
> +  WriteoutF->addFnAttr(llvm::Attribute::NoInline);
> +  if (CGM.getCodeGenOpts().DisableRedZone)
> +    WriteoutF->addFnAttr(llvm::Attribute::NoRedZone);
> +
> +  llvm::BasicBlock *BB = WriteoutF->empty() ?
> +    llvm::BasicBlock::Create(Ctx, "", WriteoutF) : &WriteoutF->getEntryBlock();
> +
> +  CGBuilderTy PGOBuilder(BB);
> +
> +  llvm::Instruction *I = BB->getTerminator();
> +  if (!I)
> +    I = PGOBuilder.CreateRetVoid();
> +  PGOBuilder.SetInsertPoint(I);
> +
> +  llvm::Type *Int64PtrTy = llvm::Type::getInt64PtrTy(Ctx);
> +  llvm::Type *Args[] = {
> +    Int8PtrTy,                       // const char *MangledName
> +    Int32Ty,                         // uint32_t NumCounters
> +    Int64PtrTy                       // uint64_t *Counters
> +  };
> +  llvm::FunctionType *FTy =
> +    llvm::FunctionType::get(PGOBuilder.getVoidTy(), Args, false);
> +  llvm::Constant *EmitFunc =
> +    CGM.getModule().getOrInsertFunction("llvm_pgo_emit", FTy);
> +
> +  llvm::Constant *MangledName =
> +    CGM.GetAddrOfConstantCString(CGM.getMangledName(GD), "__llvm_pgo_name");
> +  MangledName = llvm::ConstantExpr::getBitCast(MangledName, Int8PtrTy);
> +  PGOBuilder.CreateCall3(EmitFunc, MangledName,
> +                         PGOBuilder.getInt32(NumRegionCounters),
> +                         PGOBuilder.CreateBitCast(RegionCounters, Int64PtrTy));
> +}
> +
> +llvm::Function *CodeGenPGO::emitInitialization(CodeGenModule &CGM) {
> +  llvm::Function *WriteoutF =
> +    CGM.getModule().getFunction("__llvm_pgo_writeout");
> +  if (!WriteoutF)
> +    return NULL;
> +
> +  // Create a small bit of code that registers the "__llvm_pgo_writeout" to
> +  // be executed at exit.
> +  llvm::Function *F = CGM.getModule().getFunction("__llvm_pgo_init");
> +  if (F)
> +    return NULL;
> +
> +  llvm::LLVMContext &Ctx = CGM.getLLVMContext();
> +  llvm::FunctionType *FTy = llvm::FunctionType::get(llvm::Type::getVoidTy(Ctx),
> +                                                    false);
> +  F = llvm::Function::Create(FTy, llvm::GlobalValue::InternalLinkage,
> +                             "__llvm_pgo_init", &CGM.getModule());
> +  F->setUnnamedAddr(true);
> +  F->setLinkage(llvm::GlobalValue::InternalLinkage);
> +  F->addFnAttr(llvm::Attribute::NoInline);
> +  if (CGM.getCodeGenOpts().DisableRedZone)
> +    F->addFnAttr(llvm::Attribute::NoRedZone);
> +
> +  llvm::BasicBlock *BB = llvm::BasicBlock::Create(CGM.getLLVMContext(), "", F);
> +  CGBuilderTy PGOBuilder(BB);
> +
> +  FTy = llvm::FunctionType::get(PGOBuilder.getVoidTy(), false);
> +  llvm::Type *Params[] = {
> +    llvm::PointerType::get(FTy, 0)
> +  };
> +  FTy = llvm::FunctionType::get(PGOBuilder.getVoidTy(), Params, false);
> +
> +  // Inialize the environment and register the local writeout function.
> +  llvm::Constant *PGOInit =
> +    CGM.getModule().getOrInsertFunction("llvm_pgo_init", FTy);
> +  PGOBuilder.CreateCall(PGOInit, WriteoutF);
> +  PGOBuilder.CreateRetVoid();
> +
> +  return F;
> +}
> +
> +namespace {
> +  /// A StmtVisitor that fills a map of statements to PGO counters.
> +  struct MapRegionCounters : public ConstStmtVisitor<MapRegionCounters> {
> +    /// The next counter value to assign.
> +    unsigned NextCounter;
> +    /// The map of statements to counters.
> +    llvm::DenseMap<const Stmt*, unsigned> *CounterMap;
> +
> +    MapRegionCounters(llvm::DenseMap<const Stmt*, unsigned> *CounterMap) :
> +      NextCounter(0), CounterMap(CounterMap) {
> +    }
> +
> +    void VisitChildren(const Stmt *S) {
> +      for (Stmt::const_child_range I = S->children(); I; ++I)
> +        if (*I)
> +         this->Visit(*I);
> +    }
> +    void VisitStmt(const Stmt *S) { VisitChildren(S); }
> +
> +    /// Assign a counter to track entry to the function body
> +    void VisitFunctionDecl(const FunctionDecl *S) {
> +      (*CounterMap)[S->getBody()] = NextCounter++;
> +      Visit(S->getBody());
> +    }
> +    /// Assign a counter to track the block following a label
> +    void VisitLabelStmt(const LabelStmt *S) {
> +      (*CounterMap)[S] = NextCounter++;
> +      Visit(S->getSubStmt());
> +    }
> +    /// Assign three counters - one for the body of the loop, one for breaks
> +    /// from the loop, and one for continues.
> +    ///
> +    /// The break and continue counters cover all such statements in this loop,
> +    /// and are used in calculations to find the number of times the condition
> +    /// and exit of the loop occur. They are needed so we can differentiate
> +    /// these statements from non-local exits like return and goto.
> +    void VisitWhileStmt(const WhileStmt *S) {
> +      (*CounterMap)[S] = NextCounter;
> +      NextCounter += 3;
> +      Visit(S->getCond());
> +      Visit(S->getBody());
> +    }
> +    /// Assign counters for the body of the loop, and for breaks and
> +    /// continues. See VisitWhileStmt.
> +    void VisitDoStmt(const DoStmt *S) {
> +      (*CounterMap)[S] = NextCounter;
> +      NextCounter += 3;
> +      Visit(S->getBody());
> +      Visit(S->getCond());
> +    }
> +    /// Assign counters for the body of the loop, and for breaks and
> +    /// continues. See VisitWhileStmt.
> +    void VisitForStmt(const ForStmt *S) {
> +      (*CounterMap)[S] = NextCounter;
> +      NextCounter += 3;
> +      const Expr *E;
> +      if ((E = S->getCond()))
> +        Visit(E);
> +      Visit(S->getBody());
> +      if ((E = S->getInc()))
> +        Visit(E);
> +    }
> +    /// Assign counters for the body of the loop, and for breaks and
> +    /// continues. See VisitWhileStmt.
> +    void VisitCXXForRangeStmt(const CXXForRangeStmt *S) {
> +      (*CounterMap)[S] = NextCounter;
> +      NextCounter += 3;
> +      const Expr *E;
> +      if ((E = S->getCond()))
> +        Visit(E);
> +      Visit(S->getBody());
> +      if ((E = S->getInc()))
> +        Visit(E);
> +    }
> +    /// Assign counters for the body of the loop, and for breaks and
> +    /// continues. See VisitWhileStmt.
> +    void VisitObjCForCollectionStmt(const ObjCForCollectionStmt *S) {
> +      (*CounterMap)[S] = NextCounter;
> +      NextCounter += 3;
> +      Visit(S->getElement());
> +      Visit(S->getBody());
> +    }
> +    /// Assign a counter for the exit block of the switch statement.
> +    void VisitSwitchStmt(const SwitchStmt *S) {
> +      (*CounterMap)[S] = NextCounter++;
> +      Visit(S->getCond());
> +      Visit(S->getBody());
> +    }
> +    /// Assign a counter for a particular case in a switch. This counts jumps
> +    /// from the switch header as well as fallthrough from the case before this
> +    /// one.
> +    void VisitCaseStmt(const CaseStmt *S) {
> +      (*CounterMap)[S] = NextCounter++;
> +      Visit(S->getSubStmt());
> +    }
> +    /// Assign a counter for the default case of a switch statement. The count
> +    /// is the number of branches from the loop header to the default, and does
> +    /// not include fallthrough from previous cases. If we have multiple
> +    /// conditional branch blocks from the switch instruction to the default
> +    /// block, as with large GNU case ranges, this is the counter for the last
> +    /// edge in that series, rather than the first.
> +    void VisitDefaultStmt(const DefaultStmt *S) {
> +      (*CounterMap)[S] = NextCounter++;
> +      Visit(S->getSubStmt());
> +    }
> +    /// Assign a counter for the "then" part of an if statement. The count for
> +    /// the "else" part, if it exists, will be calculated from this counter.
> +    void VisitIfStmt(const IfStmt *S) {
> +      (*CounterMap)[S] = NextCounter++;
> +      Visit(S->getCond());
> +      Visit(S->getThen());
> +      if (S->getElse())
> +        Visit(S->getElse());
> +    }
> +    /// Assign a counter for the continuation block of a C++ try statement.
> +    void VisitCXXTryStmt(const CXXTryStmt *S) {
> +      (*CounterMap)[S] = NextCounter++;
> +      Visit(S->getTryBlock());
> +      for (unsigned I = 0, E = S->getNumHandlers(); I < E; ++I)
> +        Visit(S->getHandler(I));
> +    }
> +    /// Assign a counter for a catch statement's handler block.
> +    void VisitCXXCatchStmt(const CXXCatchStmt *S) {
> +      (*CounterMap)[S] = NextCounter++;
> +      Visit(S->getHandlerBlock());
> +    }
> +    /// Assign a counter for the "true" part of a conditional operator. The
> +    /// count in the "false" part will be calculated from this counter.
> +    void VisitConditionalOperator(const ConditionalOperator *E) {
> +      (*CounterMap)[E] = NextCounter++;
> +      Visit(E->getCond());
> +      Visit(E->getTrueExpr());
> +      Visit(E->getFalseExpr());
> +    }
> +    /// Assign a counter for the right hand side of a logical and operator.
> +    void VisitBinLAnd(const BinaryOperator *E) {
> +      (*CounterMap)[E] = NextCounter++;
> +      Visit(E->getLHS());
> +      Visit(E->getRHS());
> +    }
> +    /// Assign a counter for the right hand side of a logical or operator.
> +    void VisitBinLOr(const BinaryOperator *E) {
> +      (*CounterMap)[E] = NextCounter++;
> +      Visit(E->getLHS());
> +      Visit(E->getRHS());
> +    }
> +  };
> +}
> +
> +void CodeGenPGO::assignRegionCounters(GlobalDecl &GD) {
> +  bool InstrumentRegions = CGM.getCodeGenOpts().ProfileInstrGenerate;
> +  PGOProfileData *PGOData = CGM.getPGOData();
> +  if (!InstrumentRegions && !PGOData)
> +    return;
> +  const Decl *D = GD.getDecl();
> +  if (!D)
> +    return;
> +  mapRegionCounters(D);
> +  if (InstrumentRegions)
> +    emitCounterVariables();
> +  if (PGOData)
> +    loadRegionCounts(GD, PGOData);
> +}
> +
> +void CodeGenPGO::mapRegionCounters(const Decl *D) {
> +  RegionCounterMap = new llvm::DenseMap<const Stmt*, unsigned>();
> +  MapRegionCounters Walker(RegionCounterMap);
> +  if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D))
> +    Walker.VisitFunctionDecl(FD);
> +  NumRegionCounters = Walker.NextCounter;
> +}
> +
> +void CodeGenPGO::emitCounterVariables() {
> +  llvm::LLVMContext &Ctx = CGM.getLLVMContext();
> +  llvm::ArrayType *CounterTy = llvm::ArrayType::get(llvm::Type::getInt64Ty(Ctx),
> +                                                    NumRegionCounters);
> +  RegionCounters =
> +    new llvm::GlobalVariable(CGM.getModule(), CounterTy, false,
> +                             llvm::GlobalVariable::PrivateLinkage,
> +                             llvm::Constant::getNullValue(CounterTy),
> +                             "__llvm_pgo_ctr");
> +}
> +
> +void CodeGenPGO::emitCounterIncrement(CGBuilderTy &Builder, unsigned Counter) {
> +  if (!CGM.getCodeGenOpts().ProfileInstrGenerate)
> +    return;
> +  llvm::Value *Addr =
> +    Builder.CreateConstInBoundsGEP2_64(RegionCounters, 0, Counter);
> +  llvm::Value *Count = Builder.CreateLoad(Addr, "pgocount");
> +  Count = Builder.CreateAdd(Count, Builder.getInt64(1));
> +  Builder.CreateStore(Count, Addr);
> +}
> +
> +void CodeGenPGO::loadRegionCounts(GlobalDecl &GD, PGOProfileData *PGOData) {
> +  // For now, ignore the counts from the PGO data file only if the number of
> +  // counters does not match. This could be tightened down in the future to
> +  // ignore counts when the input changes in various ways, e.g., by comparing a
> +  // hash value based on some characteristics of the input.
> +  RegionCounts = new std::vector<uint64_t>();
> +  if (PGOData->getFunctionCounts(CGM.getMangledName(GD), *RegionCounts) ||
> +      RegionCounts->size() != NumRegionCounters) {
> +    delete RegionCounts;
> +    RegionCounts = 0;
> +  }
> +}
> +
> +void CodeGenPGO::destroyRegionCounters() {
> +  if (RegionCounterMap != 0)
> +    delete RegionCounterMap;
> +  if (RegionCounts != 0)
> +    delete RegionCounts;
> +}
> +
> +llvm::MDNode *CodeGenPGO::createBranchWeights(uint64_t TrueCount,
> +                                              uint64_t FalseCount) {
> +  if (!TrueCount && !FalseCount)
> +    return 0;
> +
> +  llvm::MDBuilder MDHelper(CGM.getLLVMContext());
> +  // TODO: need to scale down to 32-bits
> +  // According to Laplace's Rule of Succession, it is better to compute the
> +  // weight based on the count plus 1.
> +  return MDHelper.createBranchWeights(TrueCount + 1, FalseCount + 1);
> +}
> +
> +llvm::MDNode *
> +CodeGenPGO::createBranchWeights(ArrayRef<uint64_t> Weights) {
> +  llvm::MDBuilder MDHelper(CGM.getLLVMContext());
> +  // TODO: need to scale down to 32-bits, instead of just truncating.
> +  // According to Laplace's Rule of Succession, it is better to compute the
> +  // weight based on the count plus 1.
> +  SmallVector<uint32_t, 16> ScaledWeights;
> +  ScaledWeights.reserve(Weights.size());
> +  for (ArrayRef<uint64_t>::iterator WI = Weights.begin(), WE = Weights.end();
> +       WI != WE; ++WI) {
> +    ScaledWeights.push_back(*WI + 1);
> +  }
> +  return MDHelper.createBranchWeights(ScaledWeights);
> +}
>
> Added: cfe/trunk/lib/CodeGen/CodeGenPGO.h
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CodeGenPGO.h?rev=198640&view=auto
> ==============================================================================
> --- cfe/trunk/lib/CodeGen/CodeGenPGO.h (added)
> +++ cfe/trunk/lib/CodeGen/CodeGenPGO.h Mon Jan  6 16:27:43 2014
> @@ -0,0 +1,216 @@
> +//===--- CodeGenPGO.h - PGO Instrumentation for LLVM CodeGen ----*- C++ -*-===//
> +//
> +//                     The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
> +//===----------------------------------------------------------------------===//
> +//
> +// Instrumentation-based profile-guided optimization
> +//
> +//===----------------------------------------------------------------------===//
> +
> +#ifndef CLANG_CODEGEN_CODEGENPGO_H
> +#define CLANG_CODEGEN_CODEGENPGO_H
> +
> +#include "CGBuilder.h"
> +#include "CodeGenModule.h"
> +#include "CodeGenTypes.h"
> +#include "clang/Frontend/CodeGenOptions.h"
> +#include "llvm/ADT/OwningPtr.h"
> +#include "llvm/ADT/StringMap.h"
> +#include "llvm/Support/MemoryBuffer.h"
> +
> +namespace clang {
> +namespace CodeGen {
> +class RegionCounter;
> +
> +/// The raw counter data from an instrumented PGO binary
> +class PGOProfileData {
> +private:
> +  /// The PGO data
> +  llvm::OwningPtr<llvm::MemoryBuffer> DataBuffer;
> +  /// Offsets into DataBuffer for each function's counters
> +  llvm::StringMap<unsigned> DataOffsets;
> +  CodeGenModule &CGM;
> +public:
> +  PGOProfileData(CodeGenModule &CGM, std::string Path);
> +  /// Fill Counts with the profile data for the given function name. Returns
> +  /// false on success.
> +  bool getFunctionCounts(StringRef MangledName, std::vector<uint64_t> &Counts);
> +};
> +
> +/// Per-function PGO state. This class should generally not be used directly,
> +/// but instead through the CodeGenFunction and RegionCounter types.
> +class CodeGenPGO {
> +private:
> +  CodeGenModule &CGM;
> +
> +  unsigned NumRegionCounters;
> +  llvm::GlobalVariable *RegionCounters;
> +  llvm::DenseMap<const Stmt*, unsigned> *RegionCounterMap;
> +  std::vector<uint64_t> *RegionCounts;
> +  uint64_t CurrentRegionCount;
> +
> +public:
> +  CodeGenPGO(CodeGenModule &CGM)
> +    : CGM(CGM), NumRegionCounters(0), RegionCounters(0), RegionCounterMap(0),
> +      RegionCounts(0), CurrentRegionCount(0) {}
> +  ~CodeGenPGO() {}
> +
> +  /// Whether or not we have PGO region data for the current function. This is
> +  /// false both when we have no data at all and when our data has been
> +  /// discarded.
> +  bool haveRegionCounts() const { return RegionCounts != 0; }
> +
> +  /// Return the counter value of the current region.
> +  uint64_t getCurrentRegionCount() const { return CurrentRegionCount; }
> +  /// Return the counter value of the current region, or \p Min if it is larger.
> +  uint64_t getCurrentRegionCountWithMin(uint64_t Min) {
> +    return std::max(Min, CurrentRegionCount);
> +  }
> +  /// Set the counter value for the current region. This is used to keep track
> +  /// of changes to the most recent counter from control flow and non-local
> +  /// exits.
> +  void setCurrentRegionCount(uint64_t Count) { CurrentRegionCount = Count; }
> +
> +  /// Calculate branch weights appropriate for PGO data
> +  llvm::MDNode *createBranchWeights(uint64_t TrueCount, uint64_t FalseCount);
> +  llvm::MDNode *createBranchWeights(ArrayRef<uint64_t> Weights);
> +
> +  /// Assign counters to regions and configure them for PGO of a given
> +  /// function. Does nothing if instrumentation is not enabled and either
> +  /// generates global variables or associates PGO data with each of the
> +  /// counters depending on whether we are generating or using instrumentation.
> +  void assignRegionCounters(GlobalDecl &GD);
> +  /// Emit code to write counts for a given function to disk, if necessary.
> +  void emitWriteoutFunction(GlobalDecl &GD);
> +  /// Clean up region counter state. Must be called if assignRegionCounters is
> +  /// used.
> +  void destroyRegionCounters();
> +  /// Emit the logic to register region counter write out functions. Returns a
> +  /// function that implements this logic.
> +  static llvm::Function *emitInitialization(CodeGenModule &CGM);
> +
> +private:
> +  void mapRegionCounters(const Decl *D);
> +  void loadRegionCounts(GlobalDecl &GD, PGOProfileData *PGOData);
> +  void emitCounterVariables();
> +
> +  /// Emit code to increment the counter at the given index
> +  void emitCounterIncrement(CGBuilderTy &Builder, unsigned Counter);
> +
> +  /// Return the region counter for the given statement. This should only be
> +  /// called on statements that have a dedicated counter.
> +  unsigned getRegionCounter(const Stmt *S) {
> +    if (RegionCounterMap == 0)
> +      return 0;
> +    return (*RegionCounterMap)[S];
> +  }
> +
> +  /// Return the region count for the counter at the given index.
> +  uint64_t getRegionCount(unsigned Counter) {
> +    if (!haveRegionCounts())
> +      return 0;
> +    return (*RegionCounts)[Counter];
> +  }
> +
> +  friend class RegionCounter;
> +};
> +
> +/// A counter for a particular region. This is the primary interface through
> +/// which clients manage PGO counters and their values.
> +class RegionCounter {
> +  CodeGenPGO *PGO;
> +  unsigned Counter;
> +  uint64_t Count;
> +  uint64_t ParentCount;
> +  uint64_t RegionCount;
> +  int64_t Adjust;
> +
> +  RegionCounter(CodeGenPGO &PGO, unsigned CounterIndex)
> +    : PGO(&PGO), Counter(CounterIndex), Count(PGO.getRegionCount(Counter)),
> +      ParentCount(PGO.getCurrentRegionCount()), Adjust(0) {}
> +
> +public:
> +  RegionCounter(CodeGenPGO &PGO, const Stmt *S)
> +    : PGO(&PGO), Counter(PGO.getRegionCounter(S)),
> +      Count(PGO.getRegionCount(Counter)),
> +      ParentCount(PGO.getCurrentRegionCount()), Adjust(0) {}
> +
> +  /// Get the value of the counter. In most cases this is the number of times
> +  /// the region of the counter was entered, but for switch labels it's the
> +  /// number of direct jumps to that label.
> +  uint64_t getCount() const { return Count; }
> +  /// Get the value of the counter with adjustments applied. Adjustments occur
> +  /// when control enters or leaves the region abnormally, ie, if there is a
> +  /// jump to a label within the region, or if the function can return from
> +  /// within the region. The adjusted count, then, is the value of the counter
> +  /// at the end of the region.
> +  uint64_t getAdjustedCount() const {
> +    assert(Adjust > 0 || (uint64_t)(-Adjust) <= Count && "Negative count");
> +    return Count + Adjust;
> +  }
> +  /// Get the value of the counter in this region's parent, ie, the region that
> +  /// was active when this region began. This is useful for deriving counts in
> +  /// implicitly counted regions, like the false case of a condition or the
> +  /// normal exits of a loop.
> +  uint64_t getParentCount() const { return ParentCount; }
> +
> +  /// Get the number of times the condition of a loop will evaluate false. This
> +  /// is the number of times we enter the loop, adjusted by the difference
> +  /// between entering and exiting the loop body normally, excepting that
> +  /// 'continue' statements also bring us back here.
> +  ///
> +  /// Undefined if this counter is not counting a loop.
> +  uint64_t getLoopExitCount() const {
> +    return getParentCount() + getContinueCounter().getCount() +
> +      getAdjustedCount() - getCount();
> +  }
> +  /// Get the associated break counter. Undefined if this counter is not
> +  /// counting a loop.
> +  RegionCounter getBreakCounter() const {
> +    return RegionCounter(*PGO, Counter + 1);
> +  }
> +  /// Get the associated continue counter. Undefined if this counter is not
> +  /// counting a loop.
> +  RegionCounter getContinueCounter() const {
> +    return RegionCounter(*PGO, Counter + 2);
> +  }
> +
> +  /// Activate the counter by emitting an increment and starting to track
> +  /// adjustments. If AddIncomingFallThrough is true, the current region count
> +  /// will be added to the counter for the purposes of tracking the region.
> +  void beginRegion(CGBuilderTy &Builder, bool AddIncomingFallThrough=false) {
> +    RegionCount = Count;
> +    if (AddIncomingFallThrough)
> +      RegionCount += PGO->getCurrentRegionCount();
> +    PGO->setCurrentRegionCount(RegionCount);
> +    PGO->emitCounterIncrement(Builder, Counter);
> +  }
> +  /// For counters on boolean branches, begins tracking adjustments for the
> +  /// uncounted path.
> +  void beginElseRegion() {
> +    RegionCount = ParentCount - Count;
> +    PGO->setCurrentRegionCount(RegionCount);
> +  }
> +
> +  /// Control may either enter or leave the region, so the count at the end may
> +  /// be different from the start. Call this to track that adjustment without
> +  /// modifying the current count. Must not be called before one of beginRegion
> +  /// or beginElseRegion.
> +  void adjustFallThroughCount() {
> +    Adjust += PGO->getCurrentRegionCount() - RegionCount;
> +  }
> +  /// Commit all adjustments to the current region. This should be called after
> +  /// all blocks that adjust the fallthrough count have been emitted.
> +  void applyAdjustmentsToRegion() {
> +    PGO->setCurrentRegionCount(ParentCount + Adjust);
> +  }
> +};
> +
> +}  // end namespace CodeGen
> +}  // end namespace clang
> +
> +#endif
>
> Added: cfe/trunk/test/CodeGen/Inputs/instr-profile.pgodata
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/Inputs/instr-profile.pgodata?rev=198640&view=auto
> ==============================================================================
> --- cfe/trunk/test/CodeGen/Inputs/instr-profile.pgodata (added)
> +++ cfe/trunk/test/CodeGen/Inputs/instr-profile.pgodata Mon Jan  6 16:27:43 2014
> @@ -0,0 +1,127 @@
> +simple_loops 10
> +1
> +100
> +0
> +0
> +100
> +0
> +0
> +76
> +0
> +0
> +
> +conditionals 13
> +1
> +100
> +0
> +0
> +50
> +50
> +33
> +33
> +16
> +99
> +100
> +99
> +100
> +
> +early_exits 13
> +1
> +0
> +51
> +1
> +25
> +1
> +25
> +1
> +26
> +0
> +0
> +1
> +0
> +
> +jumps 30
> +1
> +1
> +0
> +0
> +0
> +1
> +0
> +0
> +0
> +0
> +1
> +0
> +1
> +2
> +3
> +2
> +0
> +0
> +0
> +3
> +0
> +1
> +1
> +1
> +10
> +0
> +0
> +0
> +10
> +9
> +
> +switches 21
> +1
> +1
> +1
> +15
> +0
> +7
> +7
> +1
> +0
> +3
> +2
> +3
> +3
> +4
> +4
> +0
> +4
> +4
> +5
> +1
> +0
> +
> +big_switch 19
> +1
> +32
> +0
> +0
> +32
> +1
> +0
> +2
> +1
> +11
> +11
> +1
> +1
> +15
> +15
> +1
> +1
> +2
> +2
> +
> +no_usable_data 5
> +1
> +1
> +1
> +1
> +1
> +
> +main 1
> +1
>
> Added: cfe/trunk/test/CodeGen/instr-profile.c
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/instr-profile.c?rev=198640&view=auto
> ==============================================================================
> --- cfe/trunk/test/CodeGen/instr-profile.c (added)
> +++ cfe/trunk/test/CodeGen/instr-profile.c Mon Jan  6 16:27:43 2014
> @@ -0,0 +1,428 @@
> +// Test that instrumentation based profiling feeds branch prediction
> +// correctly. This tests both generation of profile data and use of the same,
> +// and the input file for the -fprofile-instr-use case is expected to be result
> +// of running the program generated by the -fprofile-instr-generate case
> +// (excepting no_usable_data). As such, main() should call every function in
> +// this test.
> +
> +// RUN: %clang %s -o - -emit-llvm -S -fprofile-instr-generate | FileCheck -check-prefix=PGOGEN %s
> +// RUN: %clang %s -o - -emit-llvm -S -fprofile-instr-use=%S/Inputs/instr-profile.pgodata | FileCheck -check-prefix=PGOUSE %s
> +
> +// PGOGEN: @[[SLC:__llvm_pgo_ctr[0-9]*]] = private global [10 x i64] zeroinitializer
> +// PGOGEN: @[[IFC:__llvm_pgo_ctr[0-9]*]] = private global [13 x i64] zeroinitializer
> +// PGOGEN: @[[EEC:__llvm_pgo_ctr[0-9]*]] = private global [13 x i64] zeroinitializer
> +// PGOGEN: @[[JMC:__llvm_pgo_ctr[0-9]*]] = private global [30 x i64] zeroinitializer
> +// PGOGEN: @[[SWC:__llvm_pgo_ctr[0-9]*]] = private global [21 x i64] zeroinitializer
> +// PGOGEN: @[[BSC:__llvm_pgo_ctr[0-9]*]] = private global [19 x i64] zeroinitializer
> +// PGOGEN: @[[NOC:__llvm_pgo_ctr[0-9]*]] = private global [2 x i64]  zeroinitializer
> +// PGOGEN: @[[MAC:__llvm_pgo_ctr[0-9]*]] = private global [1 x i64]  zeroinitializer
> +
> +// PGOGEN-LABEL: @simple_loops()
> +// PGOUSE-LABEL: @simple_loops()
> +// PGOGEN: store {{.*}} @[[SLC]], i64 0, i64 0
> +void simple_loops() {
> +  int i;
> +  // PGOGEN: store {{.*}} @[[SLC]], i64 0, i64 1
> +  // PGOUSE: br {{.*}} !prof ![[SL1:[0-9]+]]
> +  for (i = 0; i < 100; ++i) {
> +  }
> +  // PGOGEN: store {{.*}} @[[SLC]], i64 0, i64 4
> +  // PGOUSE: br {{.*}} !prof ![[SL2:[0-9]+]]
> +  while (i > 0)
> +    i--;
> +  // PGOGEN: store {{.*}} @[[SLC]], i64 0, i64 7
> +  // PGOUSE: br {{.*}} !prof ![[SL3:[0-9]+]]
> +  do {} while (i++ < 75);
> +
> +  // PGOGEN-NOT: store {{.*}} @[[SLC]],
> +  // PGOUSE-NOT: br {{.*}} !prof ![0-9]+
> +}
> +
> +// PGOGEN-LABEL: @conditionals()
> +// PGOUSE-LABEL: @conditionals()
> +// PGOGEN: store {{.*}} @[[IFC]], i64 0, i64 0
> +void conditionals() {
> +  // PGOGEN: store {{.*}} @[[IFC]], i64 0, i64 1
> +  // PGOUSE: br {{.*}} !prof ![[IF1:[0-9]+]]
> +  for (int i = 0; i < 100; ++i) {
> +    // PGOGEN: store {{.*}} @[[IFC]], i64 0, i64 4
> +    // PGOUSE: br {{.*}} !prof ![[IF2:[0-9]+]]
> +    if (i % 2) {
> +      // PGOGEN: store {{.*}} @[[IFC]], i64 0, i64 5
> +      // PGOUSE: br {{.*}} !prof ![[IF3:[0-9]+]]
> +      if (i) {}
> +    // PGOGEN: store {{.*}} @[[IFC]], i64 0, i64 6
> +    // PGOUSE: br {{.*}} !prof ![[IF4:[0-9]+]]
> +    } else if (i % 3) {
> +      // PGOGEN: store {{.*}} @[[IFC]], i64 0, i64 7
> +      // PGOUSE: br {{.*}} !prof ![[IF5:[0-9]+]]
> +      if (i) {}
> +    } else {
> +      // PGOGEN: store {{.*}} @[[IFC]], i64 0, i64 8
> +      // PGOUSE: br {{.*}} !prof ![[IF6:[0-9]+]]
> +      if (i) {}
> +    }
> +
> +    // PGOGEN: store {{.*}} @[[IFC]], i64 0, i64 10
> +    // PGOGEN: store {{.*}} @[[IFC]], i64 0, i64 9
> +    // PGOUSE: br {{.*}} !prof ![[IF7:[0-9]+]]
> +    if (1 && i) {}
> +    // PGOGEN: store {{.*}} @[[IFC]], i64 0, i64 12
> +    // PGOGEN: store {{.*}} @[[IFC]], i64 0, i64 11
> +    // PGOUSE: br {{.*}} !prof ![[IF8:[0-9]+]]
> +    if (0 || i) {}
> +  }
> +
> +  // PGOGEN-NOT: store {{.*}} @[EEC]],
> +  // PGOUSE-NOT: br {{.*}} !prof ![0-9]+
> +}
> +
> +// PGOGEN-LABEL: @early_exits()
> +// PGOUSE-LABEL: @early_exits()
> +// PGOGEN: store {{.*}} @[[EEC]], i64 0, i64 0
> +void early_exits() {
> +  int i = 0;
> +
> +  // PGOGEN: store {{.*}} @[[EEC]], i64 0, i64 1
> +  // PGOUSE: br {{.*}} !prof ![[EE1:[0-9]+]]
> +  if (i) {}
> +
> +  // PGOGEN: store {{.*}} @[[EEC]], i64 0, i64 2
> +  // PGOUSE: br {{.*}} !prof ![[EE2:[0-9]+]]
> +  while (i < 100) {
> +    i++;
> +    // PGOGEN: store {{.*}} @[[EEC]], i64 0, i64 5
> +    // PGOUSE: br {{.*}} !prof ![[EE3:[0-9]+]]
> +    if (i > 50)
> +      // PGOGEN: store {{.*}} @[[EEC]], i64 0, i64 3
> +      break;
> +    // PGOGEN: store {{.*}} @[[EEC]], i64 0, i64 6
> +    // PGOUSE: br {{.*}} !prof ![[EE4:[0-9]+]]
> +    if (i % 2)
> +      // PGOGEN: store {{.*}} @[[EEC]], i64 0, i64 4
> +      continue;
> +  }
> +
> +  // PGOGEN: store {{.*}} @[[EEC]], i64 0, i64 7
> +  // PGOUSE: br {{.*}} !prof ![[EE5:[0-9]+]]
> +  if (i) {}
> +
> +  // PGOGEN: store {{.*}} @[[EEC]], i64 0, i64 8
> +  do {
> +    // PGOGEN: store {{.*}} @[[EEC]], i64 0, i64 11
> +    // PGOUSE: br {{.*}} !prof ![[EE6:[0-9]+]]
> +    if (i > 75)
> +      return;
> +    else
> +      i++;
> +  // PGOUSE: br {{.*}} !prof ![[EE7:[0-9]+]]
> +  } while (i < 100);
> +
> +  // PGOGEN: store {{.*}} @[[EEC]], i64 0, i64 12
> +  // Never reached -> no weights
> +  if (i) {}
> +
> +  // PGOGEN-NOT: store {{.*}} @[[EEC]],
> +  // PGOUSE-NOT: br {{.*}} !prof ![0-9]+
> +}
> +
> +// PGOGEN-LABEL: @jumps()
> +// PGOUSE-LABEL: @jumps()
> +// PGOGEN: store {{.*}} @[[JMC]], i64 0, i64 0
> +void jumps() {
> +  int i;
> +
> +  // PGOGEN: store {{.*}} @[[JMC]], i64 0, i64 1
> +  // PGOUSE: br {{.*}} !prof ![[JM1:[0-9]+]]
> +  for (i = 0; i < 2; ++i) {
> +    goto outofloop;
> +    // Never reached -> no weights
> +    if (i) {}
> +  }
> +// PGOGEN: store {{.*}} @[[JMC]], i64 0, i64 5
> +outofloop:
> +  // PGOGEN: store {{.*}} @[[JMC]], i64 0, i64 6
> +  // PGOUSE: br {{.*}} !prof ![[JM2:[0-9]+]]
> +  if (i) {}
> +
> +  goto loop1;
> +
> +  // PGOGEN: store {{.*}} @[[JMC]], i64 0, i64 7
> +  // PGOUSE: br {{.*}} !prof ![[JM3:[0-9]+]]
> +  while (i) {
> +  // PGOGEN: store {{.*}} @[[JMC]], i64 0, i64 10
> +  loop1:
> +    // PGOGEN: store {{.*}} @[[JMC]], i64 0, i64 11
> +    // PGOUSE: br {{.*}} !prof ![[JM4:[0-9]+]]
> +    if (i) {}
> +  }
> +
> +  goto loop2;
> +// PGOGEN: store {{.*}} @[[JMC]], i64 0, i64 12
> +first:
> +// PGOGEN: store {{.*}} @[[JMC]], i64 0, i64 13
> +second:
> +// PGOGEN: store {{.*}} @[[JMC]], i64 0, i64 14
> +third:
> +  i++;
> +  // PGOGEN: store {{.*}} @[[JMC]], i64 0, i64 15
> +  // PGOUSE: br {{.*}} !prof ![[JM5:[0-9]+]]
> +  if (i < 3)
> +    goto loop2;
> +
> +  // PGOGEN: store {{.*}} @[[JMC]], i64 0, i64 16
> +  // PGOUSE: br {{.*}} !prof ![[JM6:[0-9]+]]
> +  while (i < 3) {
> +  // PGOGEN: store {{.*}} @[[JMC]], i64 0, i64 19
> +  loop2:
> +    // PGOUSE: switch {{.*}} [
> +    // PGOUSE: ], !prof ![[JM7:[0-9]+]]
> +    switch (i) {
> +    // PGOGEN: store {{.*}} @[[JMC]], i64 0, i64 21
> +    case 0:
> +      goto first;
> +    // PGOGEN: store {{.*}} @[[JMC]], i64 0, i64 22
> +    case 1:
> +      goto second;
> +    // PGOGEN: store {{.*}} @[[JMC]], i64 0, i64 23
> +    case 2:
> +      goto third;
> +    }
> +    // PGOGEN: store {{.*}} @[[JMC]], i64 0, i64 20
> +  }
> +
> +  // PGOGEN: store {{.*}} @[[JMC]], i64 0, i64 24
> +  // PGOUSE: br {{.*}} !prof ![[JM8:[0-9]+]]
> +  for (i = 0; i < 10; ++i) {
> +    goto withinloop;
> +    // never reached -> no weights
> +    if (i) {}
> +  // PGOGEN: store {{.*}} @[[JMC]], i64 0, i64 28
> +  withinloop:
> +    // PGOGEN: store {{.*}} @[[JMC]], i64 0, i64 29
> +    // PGOUSE: br {{.*}} !prof ![[JM9:[0-9]+]]
> +    if (i) {}
> +  }
> +
> +  // PGOGEN-NOT: store {{.*}} @[[JMC]],
> +  // PGOUSE-NOT: br {{.*}} !prof ![0-9]+
> +}
> +
> +// PGOGEN-LABEL: @switches()
> +// PGOUSE-LABEL: @switches()
> +// PGOGEN: store {{.*}} @[[SWC]], i64 0, i64 0
> +void switches() {
> +  static int weights[] = {1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 5};
> +
> +  // No cases -> no weights
> +  switch (weights[0]) {
> +  // PGOGEN: store {{.*}} @[[SWC]], i64 0, i64 2
> +  default:
> +    break;
> +  }
> +  // PGOGEN: store {{.*}} @[[SWC]], i64 0, i64 1
> +
> +  // PGOGEN: store {{.*}} @[[SWC]], i64 0, i64 3
> +  // PGOUSE: br {{.*}} !prof ![[SW1:[0-9]+]]
> +  for (int i = 0, len = sizeof(weights) / sizeof(weights[0]); i < len; ++i) {
> +    // PGOUSE: switch {{.*}} [
> +    // PGOUSE: ], !prof ![[SW2:[0-9]+]]
> +    switch (i[weights]) {
> +    // PGOGEN: store {{.*}} @[[SWC]], i64 0, i64 7
> +    case 1:
> +      // PGOGEN: store {{.*}} @[[SWC]], i64 0, i64 8
> +      // PGOUSE: br {{.*}} !prof ![[SW3:[0-9]+]]
> +      if (i) {}
> +      // fallthrough
> +    // PGOGEN: store {{.*}} @[[SWC]], i64 0, i64 9
> +    case 2:
> +      // PGOGEN: store {{.*}} @[[SWC]], i64 0, i64 10
> +      // PGOUSE: br {{.*}} !prof ![[SW4:[0-9]+]]
> +      if (i) {}
> +      break;
> +    // PGOGEN: store {{.*}} @[[SWC]], i64 0, i64 11
> +    case 3:
> +      // PGOGEN: store {{.*}} @[[SWC]], i64 0, i64 12
> +      // PGOUSE: br {{.*}} !prof ![[SW5:[0-9]+]]
> +      if (i) {}
> +      continue;
> +    // PGOGEN: store {{.*}} @[[SWC]], i64 0, i64 13
> +    case 4:
> +      // PGOGEN: store {{.*}} @[[SWC]], i64 0, i64 14
> +      // PGOUSE: br {{.*}} !prof ![[SW6:[0-9]+]]
> +      if (i) {}
> +      // PGOUSE: switch {{.*}} [
> +      // PGOUSE: ], !prof ![[SW7:[0-9]+]]
> +      switch (i) {
> +      // PGOGEN: store {{.*}} @[[SWC]], i64 0, i64 16
> +      case 6 ... 9:
> +        // PGOGEN: store {{.*}} @[[SWC]], i64 0, i64 17
> +        // PGOUSE: br {{.*}} !prof ![[SW8:[0-9]+]]
> +        if (i) {}
> +        continue;
> +      }
> +      // PGOGEN: store {{.*}} @[[SWC]], i64 0, i64 15
> +
> +    // PGOGEN: store {{.*}} @[[SWC]], i64 0, i64 18
> +    default:
> +      // PGOGEN: store {{.*}} @[[SWC]], i64 0, i64 19
> +      // PGOUSE: br {{.*}} !prof ![[SW9:[0-9]+]]
> +      if (i == len - 1)
> +        return;
> +    }
> +    // PGOGEN: store {{.*}} @[[SWC]], i64 0, i64 6
> +  }
> +
> +  // PGOGEN: store {{.*}} @[[SWC]], i64 0, i64 20
> +  // Never reached -> no weights
> +  if (weights[0]) {}
> +
> +  // PGOGEN-NOT: store {{.*}} @[[SWC]],
> +  // PGOUSE-NOT: br {{.*}} !prof ![0-9]+
> +}
> +
> +// PGOGEN-LABEL: @big_switch()
> +// PGOUSE-LABEL: @big_switch()
> +// PGOGEN: store {{.*}} @[[BSC]], i64 0, i64 0
> +void big_switch() {
> +  // PGOGEN: store {{.*}} @[[BSC]], i64 0, i64 1
> +  // PGOUSE: br {{.*}} !prof ![[BS1:[0-9]+]]
> +  for (int i = 0; i < 32; ++i) {
> +    // PGOUSE: switch {{.*}} [
> +    // PGOUSE: ], !prof ![[BS2:[0-9]+]]
> +    switch (1 << i) {
> +    // PGOGEN: store {{.*}} @[[BSC]], i64 0, i64 5
> +    case (1 << 0):
> +      // PGOGEN: store {{.*}} @[[BSC]], i64 0, i64 6
> +      // PGOUSE: br {{.*}} !prof ![[BS3:[0-9]+]]
> +      if (i) {}
> +      // fallthrough
> +    // PGOGEN: store {{.*}} @[[BSC]], i64 0, i64 7
> +    case (1 << 1):
> +      // PGOGEN: store {{.*}} @[[BSC]], i64 0, i64 8
> +      // PGOUSE: br {{.*}} !prof ![[BS4:[0-9]+]]
> +      if (i) {}
> +      break;
> +    // PGOGEN: store {{.*}} @[[BSC]], i64 0, i64 9
> +    case (1 << 2) ... (1 << 12):
> +      // PGOGEN: store {{.*}} @[[BSC]], i64 0, i64 10
> +      // PGOUSE: br {{.*}} !prof ![[BS5:[0-9]+]]
> +      if (i) {}
> +      break;
> +    // The branch for the large case range above appears after the case body
> +    // PGOUSE: br {{.*}} !prof ![[BS6:[0-9]+]]
> +
> +    // PGOGEN: store {{.*}} @[[BSC]], i64 0, i64 11
> +    case (1 << 13):
> +      // PGOGEN: store {{.*}} @[[BSC]], i64 0, i64 12
> +      // PGOUSE: br {{.*}} !prof ![[BS7:[0-9]+]]
> +      if (i) {}
> +      break;
> +    // PGOGEN: store {{.*}} @[[BSC]], i64 0, i64 13
> +    case (1 << 14) ... (1 << 28):
> +      // PGOGEN: store {{.*}} @[[BSC]], i64 0, i64 14
> +      // PGOUSE: br {{.*}} !prof ![[BS8:[0-9]+]]
> +      if (i) {}
> +      break;
> +    // The branch for the large case range above appears after the case body
> +    // PGOUSE: br {{.*}} !prof ![[BS9:[0-9]+]]
> +
> +    // PGOGEN: store {{.*}} @[[BSC]], i64 0, i64 15
> +    case (1 << 29) ... ((1 << 29) + 1):
> +      // PGOGEN: store {{.*}} @[[BSC]], i64 0, i64 16
> +      // PGOUSE: br {{.*}} !prof ![[BS10:[0-9]+]]
> +      if (i) {}
> +      break;
> +    // PGOGEN: store {{.*}} @[[BSC]], i64 0, i64 17
> +    default:
> +      // PGOGEN: store {{.*}} @[[BSC]], i64 0, i64 18
> +      // PGOUSE: br {{.*}} !prof ![[BS11:[0-9]+]]
> +      if (i) {}
> +      break;
> +    }
> +    // PGOGEN: store {{.*}} @[[BSC]], i64 0, i64 4
> +  }
> +
> +  // PGOGEN-NOT: store {{.*}} @[[BSC]],
> +  // PGOUSE-NOT: br {{.*}} !prof ![0-9]+
> +  // PGOUSE: ret void
> +}
> +
> +// PGOGEN-LABEL: @no_usable_data()
> +// PGOUSE-LABEL: @no_usable_data()
> +// PGOGEN: store {{.*}} @[[NOC]], i64 0, i64 0
> +void no_usable_data() {
> +  // The input data for PGOUSE is deliberately invalid for this function, so
> +  // that we can test that we reject and ignore it properly.
> +  int i = 0;
> +
> +  // PGOGEN: store {{.*}} @[[NOC]], i64 0, i64 1
> +  if (i) {}
> +
> +  // PGOGEN-NOT: store {{.*}} @[[NOC]],
> +  // PGOUSE-NOT: br {{.*}} !prof ![0-9]+
> +}
> +
> +// PGOUSE-DAG: ![[SL1]] = metadata !{metadata !"branch_weights", i32 101, i32 2}
> +// PGOUSE-DAG: ![[SL2]] = metadata !{metadata !"branch_weights", i32 101, i32 2}
> +// PGOUSE-DAG: ![[SL3]] = metadata !{metadata !"branch_weights", i32 76, i32 2}
> +
> +// PGOUSE-DAG: ![[EE1]] = metadata !{metadata !"branch_weights", i32 1, i32 2}
> +// PGOUSE-DAG: ![[EE2]] = metadata !{metadata !"branch_weights", i32 52, i32 1}
> +// PGOUSE-DAG: ![[EE3]] = metadata !{metadata !"branch_weights", i32 2, i32 51}
> +// PGOUSE-DAG: ![[EE4]] = metadata !{metadata !"branch_weights", i32 26, i32 26}
> +// PGOUSE-DAG: ![[EE5]] = metadata !{metadata !"branch_weights", i32 2, i32 1}
> +// PGOUSE-DAG: ![[EE6]] = metadata !{metadata !"branch_weights", i32 2, i32 26}
> +// PGOUSE-DAG: ![[EE7]] = metadata !{metadata !"branch_weights", i32 26, i32 1}
> +
> +// PGOUSE-DAG: ![[IF1]] = metadata !{metadata !"branch_weights", i32 101, i32 2}
> +// PGOUSE-DAG: ![[IF2]] = metadata !{metadata !"branch_weights", i32 51, i32 51}
> +// PGOUSE-DAG: ![[IF3]] = metadata !{metadata !"branch_weights", i32 51, i32 1}
> +// PGOUSE-DAG: ![[IF4]] = metadata !{metadata !"branch_weights", i32 34, i32 18}
> +// PGOUSE-DAG: ![[IF5]] = metadata !{metadata !"branch_weights", i32 34, i32 1}
> +// PGOUSE-DAG: ![[IF6]] = metadata !{metadata !"branch_weights", i32 17, i32 2}
> +// PGOUSE-DAG: ![[IF7]] = metadata !{metadata !"branch_weights", i32 100, i32 2}
> +// PGOUSE-DAG: ![[IF8]] = metadata !{metadata !"branch_weights", i32 100, i32 2}
> +
> +// PGOUSE-DAG: ![[JM1]] = metadata !{metadata !"branch_weights", i32 2, i32 1}
> +// PGOUSE-DAG: ![[JM2]] = metadata !{metadata !"branch_weights", i32 1, i32 2}
> +// PGOUSE-DAG: ![[JM3]] = metadata !{metadata !"branch_weights", i32 1, i32 2}
> +// PGOUSE-DAG: ![[JM4]] = metadata !{metadata !"branch_weights", i32 1, i32 2}
> +// PGOUSE-DAG: ![[JM5]] = metadata !{metadata !"branch_weights", i32 3, i32 2}
> +// PGOUSE-DAG: ![[JM6]] = metadata !{metadata !"branch_weights", i32 1, i32 2}
> +// PGOUSE-DAG: ![[JM7]] = metadata !{metadata !"branch_weights", i32 1, i32 2, i32 2, i32 2}
> +// PGOUSE-DAG: ![[JM8]] = metadata !{metadata !"branch_weights", i32 11, i32 2}
> +// PGOUSE-DAG: ![[JM9]] = metadata !{metadata !"branch_weights", i32 10, i32 2}
> +
> +// PGOUSE-DAG: ![[SW1]] = metadata !{metadata !"branch_weights", i32 16, i32 1}
> +// PGOUSE-DAG: ![[SW2]] = metadata !{metadata !"branch_weights", i32 6, i32 2, i32 3, i32 4, i32 5}
> +// PGOUSE-DAG: ![[SW3]] = metadata !{metadata !"branch_weights", i32 1, i32 2}
> +// PGOUSE-DAG: ![[SW4]] = metadata !{metadata !"branch_weights", i32 3, i32 2}
> +// PGOUSE-DAG: ![[SW5]] = metadata !{metadata !"branch_weights", i32 4, i32 1}
> +// PGOUSE-DAG: ![[SW6]] = metadata !{metadata !"branch_weights", i32 5, i32 1}
> +// PGOUSE-DAG: ![[SW7]] = metadata !{metadata !"branch_weights", i32 1, i32 2, i32 2, i32 2, i32 2}
> +// PGOUSE-DAG: ![[SW8]] = metadata !{metadata !"branch_weights", i32 5, i32 1}
> +// PGOUSE-DAG: ![[SW9]] = metadata !{metadata !"branch_weights", i32 2, i32 5}
> +
> +// PGOUSE-DAG: ![[BS1]] = metadata !{metadata !"branch_weights", i32 33, i32 2}
> +// PGOUSE-DAG: ![[BS2]] = metadata !{metadata !"branch_weights", i32 29, i32 2, i32 2, i32 2, i32 2, i32 1}
> +// PGOUSE-DAG: ![[BS3]] = metadata !{metadata !"branch_weights", i32 1, i32 2}
> +// PGOUSE-DAG: ![[BS4]] = metadata !{metadata !"branch_weights", i32 2, i32 2}
> +// PGOUSE-DAG: ![[BS5]] = metadata !{metadata !"branch_weights", i32 12, i32 1}
> +// PGOUSE-DAG: ![[BS6]] = metadata !{metadata !"branch_weights", i32 12, i32 3}
> +// PGOUSE-DAG: ![[BS7]] = metadata !{metadata !"branch_weights", i32 2, i32 1}
> +// PGOUSE-DAG: ![[BS8]] = metadata !{metadata !"branch_weights", i32 16, i32 1}
> +// PGOUSE-DAG: ![[BS9]] = metadata !{metadata !"branch_weights", i32 16, i32 14}
> +// PGOUSE-DAG: ![[BS10]] = metadata !{metadata !"branch_weights", i32 2, i32 1}
> +// PGOUSE-DAG: ![[BS11]] = metadata !{metadata !"branch_weights", i32 3, i32 1}
> +
> +int main(int argc, const char *argv[]) {
> +  simple_loops();
> +  conditionals();
> +  early_exits();
> +  jumps();
> +  switches();
> +  big_switch();
> +  no_usable_data();
> +  return 0;
> +}
>
> Added: cfe/trunk/test/CodeGenCXX/Inputs/instr-profile.pgodata
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCXX/Inputs/instr-profile.pgodata?rev=198640&view=auto
> ==============================================================================
> --- cfe/trunk/test/CodeGenCXX/Inputs/instr-profile.pgodata (added)
> +++ cfe/trunk/test/CodeGenCXX/Inputs/instr-profile.pgodata Mon Jan  6 16:27:43 2014
> @@ -0,0 +1,16 @@
> +_Z6throwsv 11
> +1
> +100
> +0
> +0
> +100
> +66
> +33
> +17
> +50
> +33
> +100
> +
> +main 1
> +1
> +
>
> Added: cfe/trunk/test/CodeGenCXX/instr-profile.cpp
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCXX/instr-profile.cpp?rev=198640&view=auto
> ==============================================================================
> --- cfe/trunk/test/CodeGenCXX/instr-profile.cpp (added)
> +++ cfe/trunk/test/CodeGenCXX/instr-profile.cpp Mon Jan  6 16:27:43 2014
> @@ -0,0 +1,73 @@
> +// Test that instrumentation based profiling feeds branch prediction
> +// correctly. This tests both generation of profile data and use of the same,
> +// and the input file for the -fprofile-instr-use case is expected to be result
> +// of running the program generated by the -fprofile-instr-generate case. As
> +// such, main() should call every function in this test.
> +
> +// RUN: %clangxx %s -o - -emit-llvm -S -fprofile-instr-generate | FileCheck -check-prefix=PGOGEN %s
> +// RUN: %clangxx %s -o - -emit-llvm -S -fprofile-instr-generate | FileCheck -check-prefix=PGOGEN-EXC %s
> +
> +// RUN: %clang %s -o - -emit-llvm -S -fprofile-instr-use=%S/Inputs/instr-profile.pgodata | FileCheck -check-prefix=PGOUSE %s
> +// RUN: %clang %s -o - -emit-llvm -S -fprofile-instr-use=%S/Inputs/instr-profile.pgodata | FileCheck -check-prefix=PGOUSE-EXC %s
> +
> +// PGOGEN: @[[THC:__llvm_pgo_ctr[0-9]*]] = private global [11 x i64] zeroinitializer
> +// PGOGEN-EXC: @[[THC:__llvm_pgo_ctr[0-9]*]] = private global [11 x i64] zeroinitializer
> +
> +// PGOGEN-LABEL: @_Z6throwsv()
> +// PGOUSE-LABEL: @_Z6throwsv()
> +// PGOGEN: store {{.*}} @[[THC]], i64 0, i64 0
> +void throws() {
> +  // PGOGEN: store {{.*}} @[[THC]], i64 0, i64 1
> +  // PGOUSE: br {{.*}} !prof ![[TH1:[0-9]+]]
> +  for (int i = 0; i < 100; ++i) {
> +    try {
> +      // PGOGEN: store {{.*}} @[[THC]], i64 0, i64 5
> +      // PGOUSE: br {{.*}} !prof ![[TH2:[0-9]+]]
> +      if (i % 3) {
> +        // PGOGEN: store {{.*}} @[[THC]], i64 0, i64 6
> +        // PGOUSE: br {{.*}} !prof ![[TH3:[0-9]+]]
> +        if (i < 50)
> +          throw 1;
> +      } else {
> +        // The catch block may be emitted after the throw above, we can skip it
> +        // by looking for an else block, but this will break if anyone puts an
> +        // else in the catch
> +        // PGOUSE: if.else{{.*}}:
> +        // PGOGEN: if.else{{.*}}:
> +
> +        // PGOGEN: store {{.*}} @[[THC]], i64 0, i64 7
> +        // PGOUSE: br {{.*}} !prof ![[TH4:[0-9]+]]
> +        if (i >= 50)
> +          throw 0;
> +      }
> +    } catch (int e) {
> +      // PGOUSE-EXC: catch{{.*}}:
> +      // PGOGEN-EXC: catch{{.*}}:
> +
> +      // PGOGEN-EXC: store {{.*}} @[[THC]], i64 0, i64 8
> +      // PGOGEN-EXC: store {{.*}} @[[THC]], i64 0, i64 9
> +      // PGOUSE-EXC: br {{.*}} !prof ![[TH5:[0-9]+]]
> +      if (e) {}
> +    }
> +    // PGOGEN: store {{.*}} @[[THC]], i64 0, i64 4
> +
> +    // PGOGEN: store {{.*}} @[[THC]], i64 0, i64 10
> +    // PGOUSE: br {{.*}} !prof ![[TH6:[0-9]+]]
> +    if (i < 100) {}
> +  }
> +
> +  // PGOUSE-NOT: br {{.*}} !prof ![0-9]+
> +  // PGOUSE: ret void
> +}
> +
> +// PGOUSE-DAG: ![[TH1]] = metadata !{metadata !"branch_weights", i32 101, i32 2}
> +// PGOUSE-DAG: ![[TH2]] = metadata !{metadata !"branch_weights", i32 67, i32 35}
> +// PGOUSE-DAG: ![[TH3]] = metadata !{metadata !"branch_weights", i32 34, i32 34}
> +// PGOUSE-DAG: ![[TH4]] = metadata !{metadata !"branch_weights", i32 18, i32 18}
> +// PGOUSE-EXC: ![[TH5]] = metadata !{metadata !"branch_weights", i32 34, i32 18}
> +// PGOUSE-DAG: ![[TH6]] = metadata !{metadata !"branch_weights", i32 101, i32 1}
> +
> +int main(int argc, const char *argv[]) {
> +  throws();
> +  return 0;
> +}
>
>
> _______________________________________________
> cfe-commits mailing list
> cfe-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits



More information about the cfe-commits mailing list