r198640 - CodeGen: Initial instrumentation based PGO implementation
NAKAMURA Takumi
geek4civic at gmail.com
Mon Jan 6 17:08:01 PST 2014
Don't seek bb labels like "if.else". Labels may not be emitted with -Asserts.
I have suppressed a failure in r198651. Could you fix
CodeGenCXX/instr-profile.cpp?
2014/1/7 Justin Bogner <mail at justinbogner.com>:
> Author: bogner
> Date: Mon Jan 6 16:27:43 2014
> New Revision: 198640
>
> URL: http://llvm.org/viewvc/llvm-project?rev=198640&view=rev
> Log:
> CodeGen: Initial instrumentation based PGO implementation
>
> Added:
> cfe/trunk/lib/CodeGen/CodeGenPGO.cpp
> cfe/trunk/lib/CodeGen/CodeGenPGO.h
> cfe/trunk/test/CodeGen/Inputs/instr-profile.pgodata
> cfe/trunk/test/CodeGen/instr-profile.c
> cfe/trunk/test/CodeGenCXX/Inputs/
> cfe/trunk/test/CodeGenCXX/Inputs/instr-profile.pgodata
> cfe/trunk/test/CodeGenCXX/instr-profile.cpp
> Modified:
> cfe/trunk/lib/CodeGen/CGCUDARuntime.cpp
> cfe/trunk/lib/CodeGen/CGCall.cpp
> cfe/trunk/lib/CodeGen/CGException.cpp
> cfe/trunk/lib/CodeGen/CGExpr.cpp
> cfe/trunk/lib/CodeGen/CGExprAgg.cpp
> cfe/trunk/lib/CodeGen/CGExprComplex.cpp
> cfe/trunk/lib/CodeGen/CGExprScalar.cpp
> cfe/trunk/lib/CodeGen/CGObjC.cpp
> cfe/trunk/lib/CodeGen/CGStmt.cpp
> cfe/trunk/lib/CodeGen/CMakeLists.txt
> cfe/trunk/lib/CodeGen/CodeGenFunction.cpp
> cfe/trunk/lib/CodeGen/CodeGenFunction.h
> cfe/trunk/lib/CodeGen/CodeGenModule.cpp
> cfe/trunk/lib/CodeGen/CodeGenModule.h
>
> Modified: cfe/trunk/lib/CodeGen/CGCUDARuntime.cpp
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGCUDARuntime.cpp?rev=198640&r1=198639&r2=198640&view=diff
> ==============================================================================
> --- cfe/trunk/lib/CodeGen/CGCUDARuntime.cpp (original)
> +++ cfe/trunk/lib/CodeGen/CGCUDARuntime.cpp Mon Jan 6 16:27:43 2014
> @@ -31,7 +31,8 @@ RValue CGCUDARuntime::EmitCUDAKernelCall
> llvm::BasicBlock *ContBlock = CGF.createBasicBlock("kcall.end");
>
> CodeGenFunction::ConditionalEvaluation eval(CGF);
> - CGF.EmitBranchOnBoolExpr(E->getConfig(), ContBlock, ConfigOKBlock);
> + CGF.EmitBranchOnBoolExpr(E->getConfig(), ContBlock, ConfigOKBlock,
> + /*TrueCount=*/0);
>
> eval.begin(CGF);
> CGF.EmitBlock(ConfigOKBlock);
>
> Modified: cfe/trunk/lib/CodeGen/CGCall.cpp
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGCall.cpp?rev=198640&r1=198639&r2=198640&view=diff
> ==============================================================================
> --- cfe/trunk/lib/CodeGen/CGCall.cpp (original)
> +++ cfe/trunk/lib/CodeGen/CGCall.cpp Mon Jan 6 16:27:43 2014
> @@ -2184,6 +2184,7 @@ void CodeGenFunction::EmitNoreturnRuntim
> call->setCallingConv(getRuntimeCC());
> Builder.CreateUnreachable();
> }
> + PGO.setCurrentRegionCount(0);
> }
>
> /// Emits a call or invoke instruction to the given nullary runtime
>
> Modified: cfe/trunk/lib/CodeGen/CGException.cpp
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGException.cpp?rev=198640&r1=198639&r2=198640&view=diff
> ==============================================================================
> --- cfe/trunk/lib/CodeGen/CGException.cpp (original)
> +++ cfe/trunk/lib/CodeGen/CGException.cpp Mon Jan 6 16:27:43 2014
> @@ -1294,6 +1294,10 @@ void CodeGenFunction::ExitCXXTryStmt(con
> // Initialize the catch variable and set up the cleanups.
> BeginCatch(*this, C);
>
> + // Emit the PGO counter increment
> + RegionCounter CatchCnt = getPGORegionCounter(C);
> + CatchCnt.beginRegion(Builder);
> +
> // Perform the body of the catch.
> EmitStmt(C->getHandlerBlock());
>
> @@ -1320,7 +1324,9 @@ void CodeGenFunction::ExitCXXTryStmt(con
> Builder.CreateBr(ContBB);
> }
>
> + RegionCounter ContCnt = getPGORegionCounter(&S);
> EmitBlock(ContBB);
> + ContCnt.beginRegion(Builder);
> }
>
> namespace {
>
> Modified: cfe/trunk/lib/CodeGen/CGExpr.cpp
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGExpr.cpp?rev=198640&r1=198639&r2=198640&view=diff
> ==============================================================================
> --- cfe/trunk/lib/CodeGen/CGExpr.cpp (original)
> +++ cfe/trunk/lib/CodeGen/CGExpr.cpp Mon Jan 6 16:27:43 2014
> @@ -2651,6 +2651,7 @@ EmitConditionalOperatorLValue(const Abst
> }
>
> OpaqueValueMapping binding(*this, expr);
> + RegionCounter Cnt = getPGORegionCounter(expr);
>
> const Expr *condExpr = expr->getCond();
> bool CondExprBool;
> @@ -2658,8 +2659,12 @@ EmitConditionalOperatorLValue(const Abst
> const Expr *live = expr->getTrueExpr(), *dead = expr->getFalseExpr();
> if (!CondExprBool) std::swap(live, dead);
>
> - if (!ContainsLabel(dead))
> + if (!ContainsLabel(dead)) {
> + // If the true case is live, we need to track its region
> + if (CondExprBool)
> + Cnt.beginRegion(Builder);
> return EmitLValue(live);
> + }
> }
>
> llvm::BasicBlock *lhsBlock = createBasicBlock("cond.true");
> @@ -2667,13 +2672,15 @@ EmitConditionalOperatorLValue(const Abst
> llvm::BasicBlock *contBlock = createBasicBlock("cond.end");
>
> ConditionalEvaluation eval(*this);
> - EmitBranchOnBoolExpr(condExpr, lhsBlock, rhsBlock);
> + EmitBranchOnBoolExpr(condExpr, lhsBlock, rhsBlock, Cnt.getCount());
>
> // Any temporaries created here are conditional.
> EmitBlock(lhsBlock);
> + Cnt.beginRegion(Builder);
> eval.begin(*this);
> LValue lhs = EmitLValue(expr->getTrueExpr());
> eval.end(*this);
> + Cnt.adjustFallThroughCount();
>
> if (!lhs.isSimple())
> return EmitUnsupportedLValue(expr, "conditional operator");
> @@ -2683,14 +2690,17 @@ EmitConditionalOperatorLValue(const Abst
>
> // Any temporaries created here are conditional.
> EmitBlock(rhsBlock);
> + Cnt.beginElseRegion();
> eval.begin(*this);
> LValue rhs = EmitLValue(expr->getFalseExpr());
> eval.end(*this);
> + Cnt.adjustFallThroughCount();
> if (!rhs.isSimple())
> return EmitUnsupportedLValue(expr, "conditional operator");
> rhsBlock = Builder.GetInsertBlock();
>
> EmitBlock(contBlock);
> + Cnt.applyAdjustmentsToRegion();
>
> llvm::PHINode *phi = Builder.CreatePHI(lhs.getAddress()->getType(), 2,
> "cond-lvalue");
>
> Modified: cfe/trunk/lib/CodeGen/CGExprAgg.cpp
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGExprAgg.cpp?rev=198640&r1=198639&r2=198640&view=diff
> ==============================================================================
> --- cfe/trunk/lib/CodeGen/CGExprAgg.cpp (original)
> +++ cfe/trunk/lib/CodeGen/CGExprAgg.cpp Mon Jan 6 16:27:43 2014
> @@ -892,15 +892,18 @@ VisitAbstractConditionalOperator(const A
> // Bind the common expression if necessary.
> CodeGenFunction::OpaqueValueMapping binding(CGF, E);
>
> + RegionCounter Cnt = CGF.getPGORegionCounter(E);
> CodeGenFunction::ConditionalEvaluation eval(CGF);
> - CGF.EmitBranchOnBoolExpr(E->getCond(), LHSBlock, RHSBlock);
> + CGF.EmitBranchOnBoolExpr(E->getCond(), LHSBlock, RHSBlock, Cnt.getCount());
>
> // Save whether the destination's lifetime is externally managed.
> bool isExternallyDestructed = Dest.isExternallyDestructed();
>
> eval.begin(CGF);
> CGF.EmitBlock(LHSBlock);
> + Cnt.beginRegion(Builder);
> Visit(E->getTrueExpr());
> + Cnt.adjustFallThroughCount();
> eval.end(CGF);
>
> assert(CGF.HaveInsertPoint() && "expression evaluation ended with no IP!");
> @@ -914,10 +917,13 @@ VisitAbstractConditionalOperator(const A
>
> eval.begin(CGF);
> CGF.EmitBlock(RHSBlock);
> + Cnt.beginElseRegion();
> Visit(E->getFalseExpr());
> + Cnt.adjustFallThroughCount();
> eval.end(CGF);
>
> CGF.EmitBlock(ContBlock);
> + Cnt.applyAdjustmentsToRegion();
> }
>
> void AggExprEmitter::VisitChooseExpr(const ChooseExpr *CE) {
>
> Modified: cfe/trunk/lib/CodeGen/CGExprComplex.cpp
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGExprComplex.cpp?rev=198640&r1=198639&r2=198640&view=diff
> ==============================================================================
> --- cfe/trunk/lib/CodeGen/CGExprComplex.cpp (original)
> +++ cfe/trunk/lib/CodeGen/CGExprComplex.cpp Mon Jan 6 16:27:43 2014
> @@ -752,22 +752,28 @@ VisitAbstractConditionalOperator(const A
> // Bind the common expression if necessary.
> CodeGenFunction::OpaqueValueMapping binding(CGF, E);
>
> + RegionCounter Cnt = CGF.getPGORegionCounter(E);
> CodeGenFunction::ConditionalEvaluation eval(CGF);
> - CGF.EmitBranchOnBoolExpr(E->getCond(), LHSBlock, RHSBlock);
> + CGF.EmitBranchOnBoolExpr(E->getCond(), LHSBlock, RHSBlock, Cnt.getCount());
>
> eval.begin(CGF);
> CGF.EmitBlock(LHSBlock);
> + Cnt.beginRegion(Builder);
> ComplexPairTy LHS = Visit(E->getTrueExpr());
> + Cnt.adjustFallThroughCount();
> LHSBlock = Builder.GetInsertBlock();
> CGF.EmitBranch(ContBlock);
> eval.end(CGF);
>
> eval.begin(CGF);
> CGF.EmitBlock(RHSBlock);
> + Cnt.beginElseRegion();
> ComplexPairTy RHS = Visit(E->getFalseExpr());
> + Cnt.adjustFallThroughCount();
> RHSBlock = Builder.GetInsertBlock();
> CGF.EmitBlock(ContBlock);
> eval.end(CGF);
> + Cnt.applyAdjustmentsToRegion();
>
> // Create a PHI node for the real part.
> llvm::PHINode *RealPN = Builder.CreatePHI(LHS.first->getType(), 2, "cond.r");
>
> Modified: cfe/trunk/lib/CodeGen/CGExprScalar.cpp
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGExprScalar.cpp?rev=198640&r1=198639&r2=198640&view=diff
> ==============================================================================
> --- cfe/trunk/lib/CodeGen/CGExprScalar.cpp (original)
> +++ cfe/trunk/lib/CodeGen/CGExprScalar.cpp Mon Jan 6 16:27:43 2014
> @@ -2874,8 +2874,12 @@ Value *ScalarExprEmitter::VisitBinAssign
> }
>
> Value *ScalarExprEmitter::VisitBinLAnd(const BinaryOperator *E) {
> + RegionCounter Cnt = CGF.getPGORegionCounter(E);
> +
> // Perform vector logical and on comparisons with zero vectors.
> if (E->getType()->isVectorType()) {
> + Cnt.beginRegion(Builder);
> +
> Value *LHS = Visit(E->getLHS());
> Value *RHS = Visit(E->getRHS());
> Value *Zero = llvm::ConstantAggregateZero::get(LHS->getType());
> @@ -2897,6 +2901,8 @@ Value *ScalarExprEmitter::VisitBinLAnd(c
> bool LHSCondVal;
> if (CGF.ConstantFoldsToSimpleInteger(E->getLHS(), LHSCondVal)) {
> if (LHSCondVal) { // If we have 1 && X, just emit X.
> + Cnt.beginRegion(Builder);
> +
> Value *RHSCond = CGF.EvaluateExprAsBool(E->getRHS());
> // ZExt result to int or bool.
> return Builder.CreateZExtOrBitCast(RHSCond, ResTy, "land.ext");
> @@ -2913,7 +2919,7 @@ Value *ScalarExprEmitter::VisitBinLAnd(c
> CodeGenFunction::ConditionalEvaluation eval(CGF);
>
> // Branch on the LHS first. If it is false, go to the failure (cont) block.
> - CGF.EmitBranchOnBoolExpr(E->getLHS(), RHSBlock, ContBlock);
> + CGF.EmitBranchOnBoolExpr(E->getLHS(), RHSBlock, ContBlock, Cnt.getCount());
>
> // Any edges into the ContBlock are now from an (indeterminate number of)
> // edges from this first condition. All of these values will be false. Start
> @@ -2926,7 +2932,9 @@ Value *ScalarExprEmitter::VisitBinLAnd(c
>
> eval.begin(CGF);
> CGF.EmitBlock(RHSBlock);
> + Cnt.beginRegion(Builder);
> Value *RHSCond = CGF.EvaluateExprAsBool(E->getRHS());
> + Cnt.adjustFallThroughCount();
> eval.end(CGF);
>
> // Reaquire the RHS block, as there may be subblocks inserted.
> @@ -2939,14 +2947,19 @@ Value *ScalarExprEmitter::VisitBinLAnd(c
> Builder.SetCurrentDebugLocation(llvm::DebugLoc());
> CGF.EmitBlock(ContBlock);
> PN->addIncoming(RHSCond, RHSBlock);
> + Cnt.applyAdjustmentsToRegion();
>
> // ZExt result to int.
> return Builder.CreateZExtOrBitCast(PN, ResTy, "land.ext");
> }
>
> Value *ScalarExprEmitter::VisitBinLOr(const BinaryOperator *E) {
> + RegionCounter Cnt = CGF.getPGORegionCounter(E);
> +
> // Perform vector logical or on comparisons with zero vectors.
> if (E->getType()->isVectorType()) {
> + Cnt.beginRegion(Builder);
> +
> Value *LHS = Visit(E->getLHS());
> Value *RHS = Visit(E->getRHS());
> Value *Zero = llvm::ConstantAggregateZero::get(LHS->getType());
> @@ -2968,6 +2981,8 @@ Value *ScalarExprEmitter::VisitBinLOr(co
> bool LHSCondVal;
> if (CGF.ConstantFoldsToSimpleInteger(E->getLHS(), LHSCondVal)) {
> if (!LHSCondVal) { // If we have 0 || X, just emit X.
> + Cnt.beginRegion(Builder);
> +
> Value *RHSCond = CGF.EvaluateExprAsBool(E->getRHS());
> // ZExt result to int or bool.
> return Builder.CreateZExtOrBitCast(RHSCond, ResTy, "lor.ext");
> @@ -2984,7 +2999,8 @@ Value *ScalarExprEmitter::VisitBinLOr(co
> CodeGenFunction::ConditionalEvaluation eval(CGF);
>
> // Branch on the LHS first. If it is true, go to the success (cont) block.
> - CGF.EmitBranchOnBoolExpr(E->getLHS(), ContBlock, RHSBlock);
> + CGF.EmitBranchOnBoolExpr(E->getLHS(), ContBlock, RHSBlock,
> + Cnt.getParentCount() - Cnt.getCount());
>
> // Any edges into the ContBlock are now from an (indeterminate number of)
> // edges from this first condition. All of these values will be true. Start
> @@ -2999,7 +3015,9 @@ Value *ScalarExprEmitter::VisitBinLOr(co
>
> // Emit the RHS condition as a bool value.
> CGF.EmitBlock(RHSBlock);
> + Cnt.beginRegion(Builder);
> Value *RHSCond = CGF.EvaluateExprAsBool(E->getRHS());
> + Cnt.adjustFallThroughCount();
>
> eval.end(CGF);
>
> @@ -3010,6 +3028,7 @@ Value *ScalarExprEmitter::VisitBinLOr(co
> // into the phi node for the edge with the value of RHSCond.
> CGF.EmitBlock(ContBlock);
> PN->addIncoming(RHSCond, RHSBlock);
> + Cnt.applyAdjustmentsToRegion();
>
> // ZExt result to int.
> return Builder.CreateZExtOrBitCast(PN, ResTy, "lor.ext");
> @@ -3049,6 +3068,7 @@ VisitAbstractConditionalOperator(const A
>
> // Bind the common expression if necessary.
> CodeGenFunction::OpaqueValueMapping binding(CGF, E);
> + RegionCounter Cnt = CGF.getPGORegionCounter(E);
>
> Expr *condExpr = E->getCond();
> Expr *lhsExpr = E->getTrueExpr();
> @@ -3063,6 +3083,8 @@ VisitAbstractConditionalOperator(const A
>
> // If the dead side doesn't have labels we need, just emit the Live part.
> if (!CGF.ContainsLabel(dead)) {
> + if (CondExprBool)
> + Cnt.beginRegion(Builder);
> Value *Result = Visit(live);
>
> // If the live part is a throw expression, it acts like it has a void
> @@ -3079,6 +3101,8 @@ VisitAbstractConditionalOperator(const A
> // the select function.
> if (CGF.getLangOpts().OpenCL
> && condExpr->getType()->isVectorType()) {
> + Cnt.beginRegion(Builder);
> +
> llvm::Value *CondV = CGF.EmitScalarExpr(condExpr);
> llvm::Value *LHS = Visit(lhsExpr);
> llvm::Value *RHS = Visit(rhsExpr);
> @@ -3122,6 +3146,8 @@ VisitAbstractConditionalOperator(const A
> // safe to evaluate the LHS and RHS unconditionally.
> if (isCheapEnoughToEvaluateUnconditionally(lhsExpr, CGF) &&
> isCheapEnoughToEvaluateUnconditionally(rhsExpr, CGF)) {
> + Cnt.beginRegion(Builder);
> +
> llvm::Value *CondV = CGF.EvaluateExprAsBool(condExpr);
> llvm::Value *LHS = Visit(lhsExpr);
> llvm::Value *RHS = Visit(rhsExpr);
> @@ -3138,23 +3164,28 @@ VisitAbstractConditionalOperator(const A
> llvm::BasicBlock *ContBlock = CGF.createBasicBlock("cond.end");
>
> CodeGenFunction::ConditionalEvaluation eval(CGF);
> - CGF.EmitBranchOnBoolExpr(condExpr, LHSBlock, RHSBlock);
> + CGF.EmitBranchOnBoolExpr(condExpr, LHSBlock, RHSBlock, Cnt.getCount());
>
> CGF.EmitBlock(LHSBlock);
> + Cnt.beginRegion(Builder);
> eval.begin(CGF);
> Value *LHS = Visit(lhsExpr);
> eval.end(CGF);
> + Cnt.adjustFallThroughCount();
>
> LHSBlock = Builder.GetInsertBlock();
> Builder.CreateBr(ContBlock);
>
> CGF.EmitBlock(RHSBlock);
> + Cnt.beginElseRegion();
> eval.begin(CGF);
> Value *RHS = Visit(rhsExpr);
> eval.end(CGF);
> + Cnt.adjustFallThroughCount();
>
> RHSBlock = Builder.GetInsertBlock();
> CGF.EmitBlock(ContBlock);
> + Cnt.applyAdjustmentsToRegion();
>
> // If the LHS or RHS is a throw expression, it will be legitimately null.
> if (!LHS)
>
> Modified: cfe/trunk/lib/CodeGen/CGObjC.cpp
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGObjC.cpp?rev=198640&r1=198639&r2=198640&view=diff
> ==============================================================================
> --- cfe/trunk/lib/CodeGen/CGObjC.cpp (original)
> +++ cfe/trunk/lib/CodeGen/CGObjC.cpp Mon Jan 6 16:27:43 2014
> @@ -1523,10 +1523,13 @@ void CodeGenFunction::EmitObjCForCollect
> llvm::Value *initialMutations =
> Builder.CreateLoad(StateMutationsPtr, "forcoll.initial-mutations");
>
> + RegionCounter Cnt = getPGORegionCounter(&S);
> +
> // Start looping. This is the point we return to whenever we have a
> // fresh, non-empty batch of objects.
> llvm::BasicBlock *LoopBodyBB = createBasicBlock("forcoll.loopbody");
> EmitBlock(LoopBodyBB);
> + Cnt.beginRegion(Builder);
>
> // The current index into the buffer.
> llvm::PHINode *index = Builder.CreatePHI(UnsignedLongLTy, 3, "forcoll.index");
> @@ -1623,7 +1626,7 @@ void CodeGenFunction::EmitObjCForCollect
> EmitAutoVarCleanups(variable);
>
> // Perform the loop body, setting up break and continue labels.
> - BreakContinueStack.push_back(BreakContinue(LoopEnd, AfterBody));
> + BreakContinueStack.push_back(BreakContinue(LoopEnd, AfterBody, &Cnt));
> {
> RunCleanupsScope Scope(*this);
> EmitStmt(S.getBody());
> @@ -1642,6 +1645,7 @@ void CodeGenFunction::EmitObjCForCollect
> llvm::Value *indexPlusOne
> = Builder.CreateAdd(index, llvm::ConstantInt::get(UnsignedLongLTy, 1));
>
> + // TODO: We should probably model this as a "continue" for PGO
> // If we haven't overrun the buffer yet, we can continue.
> Builder.CreateCondBr(Builder.CreateICmpULT(indexPlusOne, count),
> LoopBodyBB, FetchMoreBB);
> @@ -1665,6 +1669,8 @@ void CodeGenFunction::EmitObjCForCollect
> index->addIncoming(zero, Builder.GetInsertBlock());
> count->addIncoming(refetchCount, Builder.GetInsertBlock());
>
> + // TODO: We should be applying PGO weights here, but this needs to handle the
> + // branch before FetchMoreBB or we risk getting the numbers wrong.
> Builder.CreateCondBr(Builder.CreateICmpEQ(refetchCount, zero),
> EmptyBB, LoopBodyBB);
>
> @@ -1687,6 +1693,7 @@ void CodeGenFunction::EmitObjCForCollect
> PopCleanupBlock();
>
> EmitBlock(LoopEnd.getBlock());
> + // TODO: Once we calculate PGO weights above, set the region count here
> }
>
> void CodeGenFunction::EmitObjCAtTryStmt(const ObjCAtTryStmt &S) {
>
> Modified: cfe/trunk/lib/CodeGen/CGStmt.cpp
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGStmt.cpp?rev=198640&r1=198639&r2=198640&view=diff
> ==============================================================================
> --- cfe/trunk/lib/CodeGen/CGStmt.cpp (original)
> +++ cfe/trunk/lib/CodeGen/CGStmt.cpp Mon Jan 6 16:27:43 2014
> @@ -358,7 +358,9 @@ void CodeGenFunction::EmitLabel(const La
> ResolveBranchFixups(Dest.getBlock());
> }
>
> + RegionCounter Cnt = getPGORegionCounter(D->getStmt());
> EmitBlock(Dest.getBlock());
> + Cnt.beginRegion(Builder);
> }
>
> /// Change the cleanup scope of the labels in this lexical scope to
> @@ -402,12 +404,14 @@ void CodeGenFunction::EmitGotoStmt(const
> EmitStopPoint(&S);
>
> EmitBranchThroughCleanup(getJumpDestForLabel(S.getLabel()));
> + PGO.setCurrentRegionCount(0);
> }
>
>
> void CodeGenFunction::EmitIndirectGotoStmt(const IndirectGotoStmt &S) {
> if (const LabelDecl *Target = S.getConstantTarget()) {
> EmitBranchThroughCleanup(getJumpDestForLabel(Target));
> + PGO.setCurrentRegionCount(0);
> return;
> }
>
> @@ -424,12 +428,14 @@ void CodeGenFunction::EmitIndirectGotoSt
> cast<llvm::PHINode>(IndGotoBB->begin())->addIncoming(V, CurBB);
>
> EmitBranch(IndGotoBB);
> + PGO.setCurrentRegionCount(0);
> }
>
> void CodeGenFunction::EmitIfStmt(const IfStmt &S) {
> // C99 6.8.4.1: The first substatement is executed if the expression compares
> // unequal to 0. The condition must be a scalar type.
> LexicalScope ConditionScope(*this, S.getSourceRange());
> + RegionCounter Cnt = getPGORegionCounter(&S);
>
> if (S.getConditionVariable())
> EmitAutoVarDecl(*S.getConditionVariable());
> @@ -447,6 +453,8 @@ void CodeGenFunction::EmitIfStmt(const I
> // If the skipped block has no labels in it, just emit the executed block.
> // This avoids emitting dead code and simplifies the CFG substantially.
> if (!ContainsLabel(Skipped)) {
> + if (CondConstant)
> + Cnt.beginRegion(Builder);
> if (Executed) {
> RunCleanupsScope ExecutedScope(*this);
> EmitStmt(Executed);
> @@ -462,14 +470,17 @@ void CodeGenFunction::EmitIfStmt(const I
> llvm::BasicBlock *ElseBlock = ContBlock;
> if (S.getElse())
> ElseBlock = createBasicBlock("if.else");
> - EmitBranchOnBoolExpr(S.getCond(), ThenBlock, ElseBlock);
> +
> + EmitBranchOnBoolExpr(S.getCond(), ThenBlock, ElseBlock, Cnt.getCount());
>
> // Emit the 'then' code.
> - EmitBlock(ThenBlock);
> + EmitBlock(ThenBlock);
> + Cnt.beginRegion(Builder);
> {
> RunCleanupsScope ThenScope(*this);
> EmitStmt(S.getThen());
> }
> + Cnt.adjustFallThroughCount();
> EmitBranch(ContBlock);
>
> // Emit the 'else' code if present.
> @@ -478,10 +489,12 @@ void CodeGenFunction::EmitIfStmt(const I
> if (getDebugInfo())
> Builder.SetCurrentDebugLocation(llvm::DebugLoc());
> EmitBlock(ElseBlock);
> + Cnt.beginElseRegion();
> {
> RunCleanupsScope ElseScope(*this);
> EmitStmt(Else);
> }
> + Cnt.adjustFallThroughCount();
> // There is no need to emit line number for unconditional branch.
> if (getDebugInfo())
> Builder.SetCurrentDebugLocation(llvm::DebugLoc());
> @@ -490,9 +503,12 @@ void CodeGenFunction::EmitIfStmt(const I
>
> // Emit the continuation block for code after the if.
> EmitBlock(ContBlock, true);
> + Cnt.applyAdjustmentsToRegion();
> }
>
> void CodeGenFunction::EmitWhileStmt(const WhileStmt &S) {
> + RegionCounter Cnt = getPGORegionCounter(&S);
> +
> // Emit the header for the loop, which will also become
> // the continue target.
> JumpDest LoopHeader = getJumpDestInCurrentScope("while.cond");
> @@ -503,7 +519,7 @@ void CodeGenFunction::EmitWhileStmt(cons
> JumpDest LoopExit = getJumpDestInCurrentScope("while.end");
>
> // Store the blocks to use for break and continue.
> - BreakContinueStack.push_back(BreakContinue(LoopExit, LoopHeader));
> + BreakContinueStack.push_back(BreakContinue(LoopExit, LoopHeader, &Cnt));
>
> // C++ [stmt.while]p2:
> // When the condition of a while statement is a declaration, the
> @@ -525,6 +541,7 @@ void CodeGenFunction::EmitWhileStmt(cons
> // while(1) is common, avoid extra exit blocks. Be sure
> // to correctly handle break/continue though.
> bool EmitBoolCondBranch = true;
> + llvm::BranchInst *CondBr = NULL;
> if (llvm::ConstantInt *C = dyn_cast<llvm::ConstantInt>(BoolCondVal))
> if (C->isOne())
> EmitBoolCondBranch = false;
> @@ -536,7 +553,7 @@ void CodeGenFunction::EmitWhileStmt(cons
> if (ConditionScope.requiresCleanups())
> ExitBlock = createBasicBlock("while.exit");
>
> - Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock);
> + CondBr = Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock);
>
> if (ExitBlock != LoopExit.getBlock()) {
> EmitBlock(ExitBlock);
> @@ -549,11 +566,19 @@ void CodeGenFunction::EmitWhileStmt(cons
> {
> RunCleanupsScope BodyScope(*this);
> EmitBlock(LoopBody);
> + Cnt.beginRegion(Builder);
> EmitStmt(S.getBody());
> }
> + Cnt.adjustFallThroughCount();
>
> BreakContinueStack.pop_back();
>
> + uint64_t LoopCount = Cnt.getCount();
> + uint64_t ExitCount = Cnt.getLoopExitCount();
> + if (EmitBoolCondBranch)
> + CondBr->setMetadata(llvm::LLVMContext::MD_prof,
> + PGO.createBranchWeights(LoopCount, ExitCount));
> +
> // Immediately force cleanup.
> ConditionScope.ForceCleanup();
>
> @@ -562,6 +587,7 @@ void CodeGenFunction::EmitWhileStmt(cons
>
> // Emit the exit block.
> EmitBlock(LoopExit.getBlock(), true);
> + PGO.setCurrentRegionCount(ExitCount + Cnt.getBreakCounter().getCount());
>
> // The LoopHeader typically is just a branch if we skipped emitting
> // a branch, try to erase it.
> @@ -573,16 +599,20 @@ void CodeGenFunction::EmitDoStmt(const D
> JumpDest LoopExit = getJumpDestInCurrentScope("do.end");
> JumpDest LoopCond = getJumpDestInCurrentScope("do.cond");
>
> + RegionCounter Cnt = getPGORegionCounter(&S);
> +
> // Store the blocks to use for break and continue.
> - BreakContinueStack.push_back(BreakContinue(LoopExit, LoopCond));
> + BreakContinueStack.push_back(BreakContinue(LoopExit, LoopCond, &Cnt));
>
> // Emit the body of the loop.
> llvm::BasicBlock *LoopBody = createBasicBlock("do.body");
> EmitBlock(LoopBody);
> + Cnt.beginRegion(Builder);
> {
> RunCleanupsScope BodyScope(*this);
> EmitStmt(S.getBody());
> }
> + Cnt.adjustFallThroughCount();
>
> BreakContinueStack.pop_back();
>
> @@ -603,12 +633,18 @@ void CodeGenFunction::EmitDoStmt(const D
> if (C->isZero())
> EmitBoolCondBranch = false;
>
> + uint64_t LoopCount = Cnt.getCount() - Cnt.getParentCount();
> + uint64_t ExitCount = Cnt.getLoopExitCount();
> +
> // As long as the condition is true, iterate the loop.
> - if (EmitBoolCondBranch)
> - Builder.CreateCondBr(BoolCondVal, LoopBody, LoopExit.getBlock());
> + if (EmitBoolCondBranch) {
> + Builder.CreateCondBr(BoolCondVal, LoopBody, LoopExit.getBlock(),
> + PGO.createBranchWeights(LoopCount, ExitCount));
> + }
>
> // Emit the exit block.
> EmitBlock(LoopExit.getBlock());
> + PGO.setCurrentRegionCount(ExitCount + Cnt.getBreakCounter().getCount());
>
> // The DoCond block typically is just a branch if we skipped
> // emitting a branch, try to erase it.
> @@ -617,6 +653,8 @@ void CodeGenFunction::EmitDoStmt(const D
> }
>
> void CodeGenFunction::EmitForStmt(const ForStmt &S) {
> + RegionCounter Cnt = getPGORegionCounter(&S);
> +
> JumpDest LoopExit = getJumpDestInCurrentScope("for.end");
>
> RunCleanupsScope ForScope(*this);
> @@ -639,6 +677,7 @@ void CodeGenFunction::EmitForStmt(const
> // Create a cleanup scope for the condition variable cleanups.
> RunCleanupsScope ConditionScope(*this);
>
> + llvm::BranchInst *CondBr = NULL;
> if (S.getCond()) {
> // If the for statement has a condition scope, emit the local variable
> // declaration.
> @@ -658,7 +697,7 @@ void CodeGenFunction::EmitForStmt(const
> // C99 6.8.5p2/p4: The first substatement is executed if the expression
> // compares unequal to 0. The condition must be a scalar type.
> llvm::Value *BoolCondVal = EvaluateExprAsBool(S.getCond());
> - Builder.CreateCondBr(BoolCondVal, ForBody, ExitBlock);
> + CondBr = Builder.CreateCondBr(BoolCondVal, ForBody, ExitBlock);
>
> if (ExitBlock != LoopExit.getBlock()) {
> EmitBlock(ExitBlock);
> @@ -670,6 +709,7 @@ void CodeGenFunction::EmitForStmt(const
> // Treat it as a non-zero constant. Don't even create a new block for the
> // body, just fall into it.
> }
> + Cnt.beginRegion(Builder);
>
> // If the for loop doesn't have an increment we can just use the
> // condition as the continue block. Otherwise we'll need to create
> @@ -679,7 +719,7 @@ void CodeGenFunction::EmitForStmt(const
> Continue = getJumpDestInCurrentScope("for.inc");
>
> // Store the blocks to use for break and continue.
> - BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
> + BreakContinueStack.push_back(BreakContinue(LoopExit, Continue, &Cnt));
>
> {
> // Create a separate cleanup scope for the body, in case it is not
> @@ -693,9 +733,16 @@ void CodeGenFunction::EmitForStmt(const
> EmitBlock(Continue.getBlock());
> EmitStmt(S.getInc());
> }
> + Cnt.adjustFallThroughCount();
>
> BreakContinueStack.pop_back();
>
> + uint64_t LoopCount = Cnt.getCount();
> + uint64_t ExitCount = Cnt.getLoopExitCount();
> + if (S.getCond())
> + CondBr->setMetadata(llvm::LLVMContext::MD_prof,
> + PGO.createBranchWeights(LoopCount, ExitCount));
> +
> ConditionScope.ForceCleanup();
> EmitBranch(CondBlock);
>
> @@ -706,9 +753,12 @@ void CodeGenFunction::EmitForStmt(const
>
> // Emit the fall-through block.
> EmitBlock(LoopExit.getBlock(), true);
> + PGO.setCurrentRegionCount(ExitCount + Cnt.getBreakCounter().getCount());
> }
>
> void CodeGenFunction::EmitCXXForRangeStmt(const CXXForRangeStmt &S) {
> + RegionCounter Cnt = getPGORegionCounter(&S);
> +
> JumpDest LoopExit = getJumpDestInCurrentScope("for.end");
>
> RunCleanupsScope ForScope(*this);
> @@ -739,7 +789,8 @@ void CodeGenFunction::EmitCXXForRangeStm
> // The body is executed if the expression, contextually converted
> // to bool, is true.
> llvm::Value *BoolCondVal = EvaluateExprAsBool(S.getCond());
> - Builder.CreateCondBr(BoolCondVal, ForBody, ExitBlock);
> + llvm::BranchInst *CondBr = Builder.CreateCondBr(BoolCondVal,
> + ForBody, ExitBlock);
>
> if (ExitBlock != LoopExit.getBlock()) {
> EmitBlock(ExitBlock);
> @@ -747,12 +798,13 @@ void CodeGenFunction::EmitCXXForRangeStm
> }
>
> EmitBlock(ForBody);
> + Cnt.beginRegion(Builder);
>
> // Create a block for the increment. In case of a 'continue', we jump there.
> JumpDest Continue = getJumpDestInCurrentScope("for.inc");
>
> // Store the blocks to use for break and continue.
> - BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
> + BreakContinueStack.push_back(BreakContinue(LoopExit, Continue, &Cnt));
>
> {
> // Create a separate cleanup scope for the loop variable and body.
> @@ -764,9 +816,15 @@ void CodeGenFunction::EmitCXXForRangeStm
> // If there is an increment, emit it next.
> EmitBlock(Continue.getBlock());
> EmitStmt(S.getInc());
> + Cnt.adjustFallThroughCount();
>
> BreakContinueStack.pop_back();
>
> + uint64_t LoopCount = Cnt.getCount();
> + uint64_t ExitCount = Cnt.getLoopExitCount();
> + CondBr->setMetadata(llvm::LLVMContext::MD_prof,
> + PGO.createBranchWeights(LoopCount, ExitCount));
> +
> EmitBranch(CondBlock);
>
> ForScope.ForceCleanup();
> @@ -776,6 +834,7 @@ void CodeGenFunction::EmitCXXForRangeStm
>
> // Emit the fall-through block.
> EmitBlock(LoopExit.getBlock(), true);
> + PGO.setCurrentRegionCount(ExitCount + Cnt.getBreakCounter().getCount());
> }
>
> void CodeGenFunction::EmitReturnOfRValue(RValue RV, QualType Ty) {
> @@ -789,6 +848,7 @@ void CodeGenFunction::EmitReturnOfRValue
> /*init*/ true);
> }
> EmitBranchThroughCleanup(ReturnBlock);
> + PGO.setCurrentRegionCount(0);
> }
>
> /// EmitReturnStmt - Note that due to GCC extensions, this can have an operand
> @@ -860,6 +920,7 @@ void CodeGenFunction::EmitReturnStmt(con
>
> cleanupScope.ForceCleanup();
> EmitBranchThroughCleanup(ReturnBlock);
> + PGO.setCurrentRegionCount(0);
> }
>
> void CodeGenFunction::EmitDeclStmt(const DeclStmt &S) {
> @@ -882,8 +943,14 @@ void CodeGenFunction::EmitBreakStmt(cons
> if (HaveInsertPoint())
> EmitStopPoint(&S);
>
> - JumpDest Block = BreakContinueStack.back().BreakBlock;
> - EmitBranchThroughCleanup(Block);
> + BreakContinue &BC = BreakContinueStack.back();
> + // We keep track of breaks from the loop so we can differentiate them from
> + // non-local exits in PGO instrumentation. This only applies to loops, not
> + // breaks from switch statements.
> + if (BC.CountBreak)
> + BC.LoopCnt->getBreakCounter().beginRegion(Builder);
> + EmitBranchThroughCleanup(BC.BreakBlock);
> + PGO.setCurrentRegionCount(0);
> }
>
> void CodeGenFunction::EmitContinueStmt(const ContinueStmt &S) {
> @@ -895,8 +962,12 @@ void CodeGenFunction::EmitContinueStmt(c
> if (HaveInsertPoint())
> EmitStopPoint(&S);
>
> - JumpDest Block = BreakContinueStack.back().ContinueBlock;
> - EmitBranchThroughCleanup(Block);
> + BreakContinue &BC = BreakContinueStack.back();
> + // We keep track of continues in the loop so we can differentiate them from
> + // non-local exits in PGO instrumentation.
> + BC.LoopCnt->getContinueCounter().beginRegion(Builder);
> + EmitBranchThroughCleanup(BC.ContinueBlock);
> + PGO.setCurrentRegionCount(0);
> }
>
> /// EmitCaseStmtRange - If case statement range is not too big then
> @@ -908,11 +979,14 @@ void CodeGenFunction::EmitCaseStmtRange(
> llvm::APSInt LHS = S.getLHS()->EvaluateKnownConstInt(getContext());
> llvm::APSInt RHS = S.getRHS()->EvaluateKnownConstInt(getContext());
>
> + RegionCounter CaseCnt = getPGORegionCounter(&S);
> +
> // Emit the code for this case. We do this first to make sure it is
> // properly chained from our predecessor before generating the
> // switch machinery to enter this block.
> EmitBlock(createBasicBlock("sw.bb"));
> llvm::BasicBlock *CaseDest = Builder.GetInsertBlock();
> + CaseCnt.beginRegion(Builder);
> EmitStmt(S.getSubStmt());
>
> // If range is empty, do nothing.
> @@ -923,7 +997,17 @@ void CodeGenFunction::EmitCaseStmtRange(
> // FIXME: parameters such as this should not be hardcoded.
> if (Range.ult(llvm::APInt(Range.getBitWidth(), 64))) {
> // Range is small enough to add multiple switch instruction cases.
> - for (unsigned i = 0, e = Range.getZExtValue() + 1; i != e; ++i) {
> + uint64_t Total = CaseCnt.getCount() - CaseCnt.getParentCount();
> + unsigned NCases = Range.getZExtValue() + 1;
> + // Divide the weights evenly between the cases, ensuring that the total
> + // weight is preserved. Ie, a weight of 5 over three cases will be
> + // distributed as weights of 2, 2, and 1.
> + uint64_t Weight = Total / NCases, Rem = Total % NCases;
> + for (unsigned I = 0; I != NCases; ++I) {
> + if (SwitchWeights)
> + SwitchWeights->push_back(Weight + (Rem ? 1 : 0));
> + if (Rem)
> + Rem--;
> SwitchInsn->addCase(Builder.getInt(LHS), CaseDest);
> LHS++;
> }
> @@ -948,7 +1032,19 @@ void CodeGenFunction::EmitCaseStmtRange(
> Builder.CreateSub(SwitchInsn->getCondition(), Builder.getInt(LHS));
> llvm::Value *Cond =
> Builder.CreateICmpULE(Diff, Builder.getInt(Range), "inbounds");
> - Builder.CreateCondBr(Cond, CaseDest, FalseDest);
> +
> + llvm::MDNode *Weights = 0;
> + if (SwitchWeights) {
> + uint64_t ThisCount = CaseCnt.getCount() - CaseCnt.getParentCount();
> + uint64_t DefaultCount = (*SwitchWeights)[0];
> + Weights = PGO.createBranchWeights(ThisCount, DefaultCount);
> +
> + // Since we're chaining the switch default through each large case range, we
> + // need to update the weight for the default, ie, the first case, to include
> + // this case.
> + (*SwitchWeights)[0] += ThisCount;
> + }
> + Builder.CreateCondBr(Cond, CaseDest, FalseDest, Weights);
>
> // Restore the appropriate insertion point.
> if (RestoreBB)
> @@ -974,17 +1070,22 @@ void CodeGenFunction::EmitCaseStmt(const
> return;
> }
>
> + RegionCounter CaseCnt = getPGORegionCounter(&S);
> llvm::ConstantInt *CaseVal =
> Builder.getInt(S.getLHS()->EvaluateKnownConstInt(getContext()));
>
> - // If the body of the case is just a 'break', and if there was no fallthrough,
> - // try to not emit an empty block.
> - if ((CGM.getCodeGenOpts().OptimizationLevel > 0) &&
> + // If the body of the case is just a 'break', try to not emit an empty block.
> + // If we're profiling or we're not optimizing, leave the block in for better
> + // debug and coverage analysis.
> + if (!CGM.getCodeGenOpts().ProfileInstrGenerate &&
> + CGM.getCodeGenOpts().OptimizationLevel > 0 &&
> isa<BreakStmt>(S.getSubStmt())) {
> JumpDest Block = BreakContinueStack.back().BreakBlock;
>
> // Only do this optimization if there are no cleanups that need emitting.
> if (isObviouslyBranchWithoutCleanups(Block)) {
> + if (SwitchWeights)
> + SwitchWeights->push_back(CaseCnt.getCount() - CaseCnt.getParentCount());
> SwitchInsn->addCase(CaseVal, Block.getBlock());
>
> // If there was a fallthrough into this case, make sure to redirect it to
> @@ -999,6 +1100,9 @@ void CodeGenFunction::EmitCaseStmt(const
>
> EmitBlock(createBasicBlock("sw.bb"));
> llvm::BasicBlock *CaseDest = Builder.GetInsertBlock();
> + if (SwitchWeights)
> + SwitchWeights->push_back(CaseCnt.getCount() - CaseCnt.getParentCount());
> + CaseCnt.beginRegion(Builder);
> SwitchInsn->addCase(CaseVal, CaseDest);
>
> // Recursively emitting the statement is acceptable, but is not wonderful for
> @@ -1016,8 +1120,14 @@ void CodeGenFunction::EmitCaseStmt(const
> // Otherwise, iteratively add consecutive cases to this switch stmt.
> while (NextCase && NextCase->getRHS() == 0) {
> CurCase = NextCase;
> - llvm::ConstantInt *CaseVal =
> + llvm::ConstantInt *CaseVal =
> Builder.getInt(CurCase->getLHS()->EvaluateKnownConstInt(getContext()));
> +
> + CaseCnt = getPGORegionCounter(NextCase);
> + if (SwitchWeights)
> + SwitchWeights->push_back(CaseCnt.getCount() - CaseCnt.getParentCount());
> + CaseCnt.beginRegion(Builder);
> +
> SwitchInsn->addCase(CaseVal, CaseDest);
> NextCase = dyn_cast<CaseStmt>(CurCase->getSubStmt());
> }
> @@ -1030,7 +1140,22 @@ void CodeGenFunction::EmitDefaultStmt(co
> llvm::BasicBlock *DefaultBlock = SwitchInsn->getDefaultDest();
> assert(DefaultBlock->empty() &&
> "EmitDefaultStmt: Default block already defined?");
> +
> + llvm::BasicBlock *SkipCountBB = 0;
> + if (CGM.getCodeGenOpts().ProfileInstrGenerate) {
> + // The PGO region here needs to count the number of times the edge occurs,
> + // so fallthrough into this case will jump past the region counter to the
> + // skipcount basic block.
> + SkipCountBB = createBasicBlock("skipcount");
> + EmitBranch(SkipCountBB);
> + }
> EmitBlock(DefaultBlock);
> +
> + RegionCounter Cnt = getPGORegionCounter(&S);
> + Cnt.beginRegion(Builder, /*AddIncomingFallThrough=*/true);
> +
> + if (SkipCountBB)
> + EmitBlock(SkipCountBB);
> EmitStmt(S.getSubStmt());
> }
>
> @@ -1187,7 +1312,8 @@ static CSFC_Result CollectStatementsForC
> static bool FindCaseStatementsForValue(const SwitchStmt &S,
> const llvm::APSInt &ConstantCondValue,
> SmallVectorImpl<const Stmt*> &ResultStmts,
> - ASTContext &C) {
> + ASTContext &C,
> + const SwitchCase *&ResultCase) {
> // First step, find the switch case that is being branched to. We can do this
> // efficiently by scanning the SwitchCase list.
> const SwitchCase *Case = S.getSwitchCaseList();
> @@ -1230,6 +1356,7 @@ static bool FindCaseStatementsForValue(c
> // while (1) {
> // case 4: ...
> bool FoundCase = false;
> + ResultCase = Case;
> return CollectStatementsForCase(S.getBody(), Case, FoundCase,
> ResultStmts) != CSFC_Failure &&
> FoundCase;
> @@ -1245,6 +1372,7 @@ void CodeGenFunction::EmitSwitchStmt(con
>
> // Handle nested switch statements.
> llvm::SwitchInst *SavedSwitchInsn = SwitchInsn;
> + SmallVector<uint64_t, 16> *SavedSwitchWeights = SwitchWeights;
> llvm::BasicBlock *SavedCRBlock = CaseRangeBlock;
>
> // See if we can constant fold the condition of the switch and therefore only
> @@ -1252,8 +1380,14 @@ void CodeGenFunction::EmitSwitchStmt(con
> llvm::APSInt ConstantCondValue;
> if (ConstantFoldsToSimpleInteger(S.getCond(), ConstantCondValue)) {
> SmallVector<const Stmt*, 4> CaseStmts;
> + const SwitchCase *Case = 0;
> if (FindCaseStatementsForValue(S, ConstantCondValue, CaseStmts,
> - getContext())) {
> + getContext(), Case)) {
> + PGO.setCurrentRegionCount(0);
> + if (Case) {
> + RegionCounter CaseCnt = getPGORegionCounter(Case);
> + CaseCnt.beginRegion(Builder);
> + }
> RunCleanupsScope ExecutedScope(*this);
>
> // At this point, we are no longer "within" a switch instance, so
> @@ -1265,6 +1399,8 @@ void CodeGenFunction::EmitSwitchStmt(con
> // specified series of statements and we're good.
> for (unsigned i = 0, e = CaseStmts.size(); i != e; ++i)
> EmitStmt(CaseStmts[i]);
> + RegionCounter ExitCnt = getPGORegionCounter(&S);
> + ExitCnt.beginRegion(Builder);
>
> // Now we want to restore the saved switch instance so that nested
> // switches continue to function properly
> @@ -1282,18 +1418,41 @@ void CodeGenFunction::EmitSwitchStmt(con
> // failure.
> llvm::BasicBlock *DefaultBlock = createBasicBlock("sw.default");
> SwitchInsn = Builder.CreateSwitch(CondV, DefaultBlock);
> + if (PGO.haveRegionCounts()) {
> + // Walk the SwitchCase list to find how many there are.
> + uint64_t DefaultCount = 0;
> + unsigned NumCases = 0;
> + for (const SwitchCase *Case = S.getSwitchCaseList();
> + Case;
> + Case = Case->getNextSwitchCase()) {
> + if (isa<DefaultStmt>(Case))
> + DefaultCount = getPGORegionCounter(Case).getCount();
> + NumCases += 1;
> + }
> + SwitchWeights = new SmallVector<uint64_t, 16>();
> + SwitchWeights->reserve(NumCases);
> + // The default needs to be first. We store the edge count, so we already
> + // know the right weight.
> + SwitchWeights->push_back(DefaultCount);
> + }
> CaseRangeBlock = DefaultBlock;
>
> // Clear the insertion point to indicate we are in unreachable code.
> Builder.ClearInsertionPoint();
> + PGO.setCurrentRegionCount(0);
>
> // All break statements jump to NextBlock. If BreakContinueStack is non-empty
> - // then reuse last ContinueBlock.
> + // then reuse last ContinueBlock and that block's counter.
> JumpDest OuterContinue;
> - if (!BreakContinueStack.empty())
> - OuterContinue = BreakContinueStack.back().ContinueBlock;
> + RegionCounter *OuterCount = 0;
> + if (!BreakContinueStack.empty()) {
> + BreakContinue &BC = BreakContinueStack.back();
> + OuterContinue = BC.ContinueBlock;
> + OuterCount = BC.LoopCnt;
> + }
>
> - BreakContinueStack.push_back(BreakContinue(SwitchExit, OuterContinue));
> + BreakContinueStack.push_back(BreakContinue(SwitchExit, OuterContinue,
> + OuterCount, /*CountBreak=*/false));
>
> // Emit switch body.
> EmitStmt(S.getBody());
> @@ -1322,8 +1481,20 @@ void CodeGenFunction::EmitSwitchStmt(con
>
> // Emit continuation.
> EmitBlock(SwitchExit.getBlock(), true);
> + RegionCounter ExitCnt = getPGORegionCounter(&S);
> + ExitCnt.beginRegion(Builder);
>
> + if (SwitchWeights) {
> + assert(SwitchWeights->size() == 1 + SwitchInsn->getNumCases() &&
> + "switch weights do not match switch cases");
> + // If there's only one jump destination there's no sense weighting it.
> + if (SwitchWeights->size() > 1)
> + SwitchInsn->setMetadata(llvm::LLVMContext::MD_prof,
> + PGO.createBranchWeights(*SwitchWeights));
> + delete SwitchWeights;
> + }
> SwitchInsn = SavedSwitchInsn;
> + SwitchWeights = SavedSwitchWeights;
> CaseRangeBlock = SavedCRBlock;
> }
>
>
> Modified: cfe/trunk/lib/CodeGen/CMakeLists.txt
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CMakeLists.txt?rev=198640&r1=198639&r2=198640&view=diff
> ==============================================================================
> --- cfe/trunk/lib/CodeGen/CMakeLists.txt (original)
> +++ cfe/trunk/lib/CodeGen/CMakeLists.txt Mon Jan 6 16:27:43 2014
> @@ -53,6 +53,7 @@ add_clang_library(clangCodeGen
> CodeGenModule.cpp
> CodeGenTBAA.cpp
> CodeGenTypes.cpp
> + CodeGenPGO.cpp
> ItaniumCXXABI.cpp
> MicrosoftCXXABI.cpp
> ModuleBuilder.cpp
>
> Modified: cfe/trunk/lib/CodeGen/CodeGenFunction.cpp
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CodeGenFunction.cpp?rev=198640&r1=198639&r2=198640&view=diff
> ==============================================================================
> --- cfe/trunk/lib/CodeGen/CodeGenFunction.cpp (original)
> +++ cfe/trunk/lib/CodeGen/CodeGenFunction.cpp Mon Jan 6 16:27:43 2014
> @@ -16,6 +16,7 @@
> #include "CGCXXABI.h"
> #include "CGDebugInfo.h"
> #include "CodeGenModule.h"
> +#include "CodeGenPGO.h"
> #include "TargetInfo.h"
> #include "clang/AST/ASTContext.h"
> #include "clang/AST/Decl.h"
> @@ -44,7 +45,8 @@ CodeGenFunction::CodeGenFunction(CodeGen
> NextCleanupDestIndex(1), FirstBlockInfo(0), EHResumeBlock(0),
> ExceptionSlot(0), EHSelectorSlot(0), DebugInfo(CGM.getModuleDebugInfo()),
> DisableDebugInfo(false), DidCallStackSave(false), IndirectBranch(0),
> - SwitchInsn(0), CaseRangeBlock(0), UnreachableBlock(0), NumReturnExprs(0),
> + PGO(cgm), SwitchInsn(0), SwitchWeights(0),
> + CaseRangeBlock(0), UnreachableBlock(0), NumReturnExprs(0),
> NumSimpleReturnExprs(0), CXXABIThisDecl(0), CXXABIThisValue(0),
> CXXThisValue(0), CXXDefaultInitExprThis(0),
> CXXStructorImplicitParamDecl(0), CXXStructorImplicitParamValue(0),
> @@ -571,6 +573,8 @@ void CodeGenFunction::StartFunction(Glob
> if (CGM.getCodeGenOpts().InstrumentForProfiling)
> EmitMCountInstrumentation();
>
> + PGO.assignRegionCounters(GD);
> +
> if (RetTy->isVoidType()) {
> // Void type; nothing to return.
> ReturnValue = 0;
> @@ -643,6 +647,8 @@ void CodeGenFunction::StartFunction(Glob
>
> void CodeGenFunction::EmitFunctionBody(FunctionArgList &Args,
> const Stmt *Body) {
> + RegionCounter Cnt = getPGORegionCounter(Body);
> + Cnt.beginRegion(Builder);
> if (const CompoundStmt *S = dyn_cast<CompoundStmt>(Body))
> EmitCompoundStmtWithoutScope(*S);
> else
> @@ -772,6 +778,9 @@ void CodeGenFunction::GenerateCode(Globa
> // a quick pass now to see if we can.
> if (!CurFn->doesNotThrow())
> TryMarkNoThrow(CurFn);
> +
> + PGO.emitWriteoutFunction(CurGD);
> + PGO.destroyRegionCounters();
> }
>
> /// ContainsLabel - Return true if the statement contains a label in it. If
> @@ -870,10 +879,13 @@ ConstantFoldsToSimpleInteger(const Expr
> ///
> void CodeGenFunction::EmitBranchOnBoolExpr(const Expr *Cond,
> llvm::BasicBlock *TrueBlock,
> - llvm::BasicBlock *FalseBlock) {
> + llvm::BasicBlock *FalseBlock,
> + uint64_t TrueCount) {
> Cond = Cond->IgnoreParens();
>
> if (const BinaryOperator *CondBOp = dyn_cast<BinaryOperator>(Cond)) {
> + RegionCounter Cnt = getPGORegionCounter(CondBOp);
> +
> // Handle X && Y in a condition.
> if (CondBOp->getOpcode() == BO_LAnd) {
> // If we have "1 && X", simplify the code. "0 && X" would have constant
> @@ -882,7 +894,9 @@ void CodeGenFunction::EmitBranchOnBoolEx
> if (ConstantFoldsToSimpleInteger(CondBOp->getLHS(), ConstantBool) &&
> ConstantBool) {
> // br(1 && X) -> br(X).
> - return EmitBranchOnBoolExpr(CondBOp->getRHS(), TrueBlock, FalseBlock);
> + Cnt.beginRegion(Builder);
> + return EmitBranchOnBoolExpr(CondBOp->getRHS(), TrueBlock, FalseBlock,
> + TrueCount);
> }
>
> // If we have "X && 1", simplify the code to use an uncond branch.
> @@ -890,21 +904,28 @@ void CodeGenFunction::EmitBranchOnBoolEx
> if (ConstantFoldsToSimpleInteger(CondBOp->getRHS(), ConstantBool) &&
> ConstantBool) {
> // br(X && 1) -> br(X).
> - return EmitBranchOnBoolExpr(CondBOp->getLHS(), TrueBlock, FalseBlock);
> + return EmitBranchOnBoolExpr(CondBOp->getLHS(), TrueBlock, FalseBlock,
> + TrueCount);
> }
>
> // Emit the LHS as a conditional. If the LHS conditional is false, we
> // want to jump to the FalseBlock.
> llvm::BasicBlock *LHSTrue = createBasicBlock("land.lhs.true");
> + // The counter tells us how often we evaluate RHS, and all of TrueCount
> + // can be propagated to that branch.
> + uint64_t RHSCount = Cnt.getCount();
>
> ConditionalEvaluation eval(*this);
> - EmitBranchOnBoolExpr(CondBOp->getLHS(), LHSTrue, FalseBlock);
> + EmitBranchOnBoolExpr(CondBOp->getLHS(), LHSTrue, FalseBlock, RHSCount);
> EmitBlock(LHSTrue);
>
> // Any temporaries created here are conditional.
> + Cnt.beginRegion(Builder);
> eval.begin(*this);
> - EmitBranchOnBoolExpr(CondBOp->getRHS(), TrueBlock, FalseBlock);
> + EmitBranchOnBoolExpr(CondBOp->getRHS(), TrueBlock, FalseBlock, TrueCount);
> eval.end(*this);
> + Cnt.adjustFallThroughCount();
> + Cnt.applyAdjustmentsToRegion();
>
> return;
> }
> @@ -916,7 +937,9 @@ void CodeGenFunction::EmitBranchOnBoolEx
> if (ConstantFoldsToSimpleInteger(CondBOp->getLHS(), ConstantBool) &&
> !ConstantBool) {
> // br(0 || X) -> br(X).
> - return EmitBranchOnBoolExpr(CondBOp->getRHS(), TrueBlock, FalseBlock);
> + Cnt.beginRegion(Builder);
> + return EmitBranchOnBoolExpr(CondBOp->getRHS(), TrueBlock, FalseBlock,
> + TrueCount);
> }
>
> // If we have "X || 0", simplify the code to use an uncond branch.
> @@ -924,21 +947,31 @@ void CodeGenFunction::EmitBranchOnBoolEx
> if (ConstantFoldsToSimpleInteger(CondBOp->getRHS(), ConstantBool) &&
> !ConstantBool) {
> // br(X || 0) -> br(X).
> - return EmitBranchOnBoolExpr(CondBOp->getLHS(), TrueBlock, FalseBlock);
> + return EmitBranchOnBoolExpr(CondBOp->getLHS(), TrueBlock, FalseBlock,
> + TrueCount);
> }
>
> // Emit the LHS as a conditional. If the LHS conditional is true, we
> // want to jump to the TrueBlock.
> llvm::BasicBlock *LHSFalse = createBasicBlock("lor.lhs.false");
> + // We have the count for entry to the RHS and for the whole expression
> + // being true, so we can divy up True count between the short circuit and
> + // the RHS.
> + uint64_t LHSCount = TrueCount - Cnt.getCount();
> + uint64_t RHSCount = TrueCount - LHSCount;
>
> ConditionalEvaluation eval(*this);
> - EmitBranchOnBoolExpr(CondBOp->getLHS(), TrueBlock, LHSFalse);
> + EmitBranchOnBoolExpr(CondBOp->getLHS(), TrueBlock, LHSFalse, LHSCount);
> EmitBlock(LHSFalse);
>
> // Any temporaries created here are conditional.
> + Cnt.beginRegion(Builder);
> eval.begin(*this);
> - EmitBranchOnBoolExpr(CondBOp->getRHS(), TrueBlock, FalseBlock);
> + EmitBranchOnBoolExpr(CondBOp->getRHS(), TrueBlock, FalseBlock, RHSCount);
> +
> eval.end(*this);
> + Cnt.adjustFallThroughCount();
> + Cnt.applyAdjustmentsToRegion();
>
> return;
> }
> @@ -946,8 +979,13 @@ void CodeGenFunction::EmitBranchOnBoolEx
>
> if (const UnaryOperator *CondUOp = dyn_cast<UnaryOperator>(Cond)) {
> // br(!x, t, f) -> br(x, f, t)
> - if (CondUOp->getOpcode() == UO_LNot)
> - return EmitBranchOnBoolExpr(CondUOp->getSubExpr(), FalseBlock, TrueBlock);
> + if (CondUOp->getOpcode() == UO_LNot) {
> + // Negate the count.
> + uint64_t FalseCount = PGO.getCurrentRegionCount() - TrueCount;
> + // Negate the condition and swap the destination blocks.
> + return EmitBranchOnBoolExpr(CondUOp->getSubExpr(), FalseBlock, TrueBlock,
> + FalseCount);
> + }
> }
>
> if (const ConditionalOperator *CondOp = dyn_cast<ConditionalOperator>(Cond)) {
> @@ -955,17 +993,33 @@ void CodeGenFunction::EmitBranchOnBoolEx
> llvm::BasicBlock *LHSBlock = createBasicBlock("cond.true");
> llvm::BasicBlock *RHSBlock = createBasicBlock("cond.false");
>
> + RegionCounter Cnt = getPGORegionCounter(CondOp);
> ConditionalEvaluation cond(*this);
> - EmitBranchOnBoolExpr(CondOp->getCond(), LHSBlock, RHSBlock);
> + EmitBranchOnBoolExpr(CondOp->getCond(), LHSBlock, RHSBlock, Cnt.getCount());
> +
> + // When computing PGO branch weights, we only know the overall count for
> + // the true block. This code is essentially doing tail duplication of the
> + // naive code-gen, introducing new edges for which counts are not
> + // available. Divide the counts proportionally between the LHS and RHS of
> + // the conditional operator.
> + uint64_t LHSScaledTrueCount = 0;
> + if (TrueCount) {
> + double LHSRatio = Cnt.getCount() / (double) PGO.getCurrentRegionCount();
> + LHSScaledTrueCount = TrueCount * LHSRatio;
> + }
>
> cond.begin(*this);
> EmitBlock(LHSBlock);
> - EmitBranchOnBoolExpr(CondOp->getLHS(), TrueBlock, FalseBlock);
> + Cnt.beginRegion(Builder);
> + EmitBranchOnBoolExpr(CondOp->getLHS(), TrueBlock, FalseBlock,
> + LHSScaledTrueCount);
> cond.end(*this);
>
> cond.begin(*this);
> EmitBlock(RHSBlock);
> - EmitBranchOnBoolExpr(CondOp->getRHS(), TrueBlock, FalseBlock);
> + Cnt.beginElseRegion();
> + EmitBranchOnBoolExpr(CondOp->getRHS(), TrueBlock, FalseBlock,
> + TrueCount - LHSScaledTrueCount);
> cond.end(*this);
>
> return;
> @@ -981,9 +1035,15 @@ void CodeGenFunction::EmitBranchOnBoolEx
> return;
> }
>
> + // Create branch weights based on the number of times we get here and the
> + // number of times the condition should be true.
> + uint64_t CurrentCount = PGO.getCurrentRegionCountWithMin(TrueCount);
> + llvm::MDNode *Weights = PGO.createBranchWeights(TrueCount,
> + CurrentCount - TrueCount);
> +
> // Emit the code with the fully general case.
> llvm::Value *CondV = EvaluateExprAsBool(Cond);
> - Builder.CreateCondBr(CondV, TrueBlock, FalseBlock);
> + Builder.CreateCondBr(CondV, TrueBlock, FalseBlock, Weights);
> }
>
> /// ErrorUnsupported - Print out an error that codegen doesn't support the
>
> Modified: cfe/trunk/lib/CodeGen/CodeGenFunction.h
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CodeGenFunction.h?rev=198640&r1=198639&r2=198640&view=diff
> ==============================================================================
> --- cfe/trunk/lib/CodeGen/CodeGenFunction.h (original)
> +++ cfe/trunk/lib/CodeGen/CodeGenFunction.h Mon Jan 6 16:27:43 2014
> @@ -19,6 +19,7 @@
> #include "CGValue.h"
> #include "EHScopeStack.h"
> #include "CodeGenModule.h"
> +#include "CodeGenPGO.h"
> #include "clang/AST/CharUnits.h"
> #include "clang/AST/ExprCXX.h"
> #include "clang/AST/ExprObjC.h"
> @@ -817,19 +818,36 @@ private:
> llvm::DenseMap<const LabelDecl*, JumpDest> LabelMap;
>
> // BreakContinueStack - This keeps track of where break and continue
> - // statements should jump to.
> + // statements should jump to and the associated base counter for
> + // instrumentation.
> struct BreakContinue {
> - BreakContinue(JumpDest Break, JumpDest Continue)
> - : BreakBlock(Break), ContinueBlock(Continue) {}
> + BreakContinue(JumpDest Break, JumpDest Continue, RegionCounter *LoopCnt,
> + bool CountBreak = true)
> + : BreakBlock(Break), ContinueBlock(Continue), LoopCnt(LoopCnt),
> + CountBreak(CountBreak) {}
>
> JumpDest BreakBlock;
> JumpDest ContinueBlock;
> + RegionCounter *LoopCnt;
> + bool CountBreak;
> };
> SmallVector<BreakContinue, 8> BreakContinueStack;
>
> + CodeGenPGO PGO;
> +
> +public:
> + /// Get a counter for instrumentation of the region associated with the given
> + /// statement.
> + RegionCounter getPGORegionCounter(const Stmt *S) {
> + return RegionCounter(PGO, S);
> + }
> +private:
> +
> /// SwitchInsn - This is nearest current switch instruction. It is null if
> /// current context is not in a switch.
> llvm::SwitchInst *SwitchInsn;
> + /// The branch weights of SwitchInsn when doing instrumentation based PGO.
> + SmallVector<uint64_t, 16> *SwitchWeights;
>
> /// CaseRangeBlock - This block holds if condition check for last case
> /// statement range in current switch instruction.
> @@ -2413,8 +2431,10 @@ public:
> /// EmitBranchOnBoolExpr - Emit a branch on a boolean condition (e.g. for an
> /// if statement) to the specified blocks. Based on the condition, this might
> /// try to simplify the codegen of the conditional based on the branch.
> + /// TrueCount should be the number of times we expect the condition to
> + /// evaluate to true based on PGO data.
> void EmitBranchOnBoolExpr(const Expr *Cond, llvm::BasicBlock *TrueBlock,
> - llvm::BasicBlock *FalseBlock);
> + llvm::BasicBlock *FalseBlock, uint64_t TrueCount);
>
> /// \brief Emit a description of a type in a format suitable for passing to
> /// a runtime sanitizer handler.
>
> Modified: cfe/trunk/lib/CodeGen/CodeGenModule.cpp
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CodeGenModule.cpp?rev=198640&r1=198639&r2=198640&view=diff
> ==============================================================================
> --- cfe/trunk/lib/CodeGen/CodeGenModule.cpp (original)
> +++ cfe/trunk/lib/CodeGen/CodeGenModule.cpp Mon Jan 6 16:27:43 2014
> @@ -20,6 +20,7 @@
> #include "CGOpenCLRuntime.h"
> #include "CodeGenFunction.h"
> #include "CodeGenTBAA.h"
> +#include "CodeGenPGO.h"
> #include "TargetInfo.h"
> #include "clang/AST/ASTContext.h"
> #include "clang/AST/CharUnits.h"
> @@ -77,7 +78,8 @@ CodeGenModule::CodeGenModule(ASTContext
> ABI(createCXXABI(*this)), VMContext(M.getContext()), TBAA(0),
> TheTargetCodeGenInfo(0), Types(*this), VTables(*this), ObjCRuntime(0),
> OpenCLRuntime(0), CUDARuntime(0), DebugInfo(0), ARCData(0),
> - NoObjCARCExceptionsMetadata(0), RRData(0), CFConstantStringClassRef(0),
> + NoObjCARCExceptionsMetadata(0), RRData(0), PGOData(0),
> + CFConstantStringClassRef(0),
> ConstantStringClassRef(0), NSConstantStringType(0),
> NSConcreteGlobalBlock(0), NSConcreteStackBlock(0), BlockObjectAssign(0),
> BlockObjectDispose(0), BlockDescriptorType(0), GenericBlockLiteralType(0),
> @@ -131,6 +133,9 @@ CodeGenModule::CodeGenModule(ASTContext
> if (C.getLangOpts().ObjCAutoRefCount)
> ARCData = new ARCEntrypoints();
> RRData = new RREntrypoints();
> +
> + if (!CodeGenOpts.InstrProfileInput.empty())
> + PGOData = new PGOProfileData(*this, CodeGenOpts.InstrProfileInput);
> }
>
> CodeGenModule::~CodeGenModule() {
> @@ -2181,6 +2186,10 @@ void CodeGenModule::EmitGlobalFunctionDe
> AddGlobalDtor(Fn, DA->getPriority());
> if (D->hasAttr<AnnotateAttr>())
> AddGlobalAnnotations(D, Fn);
> +
> + llvm::Function *PGOInit = CodeGenPGO::emitInitialization(*this);
> + if (PGOInit)
> + AddGlobalCtor(PGOInit, 0);
> }
>
> void CodeGenModule::EmitAliasDefinition(GlobalDecl GD) {
>
> Modified: cfe/trunk/lib/CodeGen/CodeGenModule.h
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CodeGenModule.h?rev=198640&r1=198639&r2=198640&view=diff
> ==============================================================================
> --- cfe/trunk/lib/CodeGen/CodeGenModule.h (original)
> +++ cfe/trunk/lib/CodeGen/CodeGenModule.h Mon Jan 6 16:27:43 2014
> @@ -85,7 +85,8 @@ namespace CodeGen {
> class CGCUDARuntime;
> class BlockFieldFlags;
> class FunctionArgList;
> -
> + class PGOProfileData;
> +
> struct OrderGlobalInits {
> unsigned int priority;
> unsigned int lex_order;
> @@ -258,6 +259,7 @@ class CodeGenModule : public CodeGenType
> ARCEntrypoints *ARCData;
> llvm::MDNode *NoObjCARCExceptionsMetadata;
> RREntrypoints *RRData;
> + PGOProfileData *PGOData;
>
> // WeakRefReferences - A set of references that have only been seen via
> // a weakref so far. This is used to remove the weak of the reference if we
> @@ -479,6 +481,10 @@ public:
> return *RRData;
> }
>
> + PGOProfileData *getPGOData() const {
> + return PGOData;
> + }
> +
> llvm::Constant *getStaticLocalDeclAddress(const VarDecl *D) {
> return StaticLocalDeclMap[D];
> }
>
> Added: cfe/trunk/lib/CodeGen/CodeGenPGO.cpp
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CodeGenPGO.cpp?rev=198640&view=auto
> ==============================================================================
> --- cfe/trunk/lib/CodeGen/CodeGenPGO.cpp (added)
> +++ cfe/trunk/lib/CodeGen/CodeGenPGO.cpp Mon Jan 6 16:27:43 2014
> @@ -0,0 +1,456 @@
> +//===--- CodeGenPGO.cpp - PGO Instrumentation for LLVM CodeGen --*- C++ -*-===//
> +//
> +// The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
> +//===----------------------------------------------------------------------===//
> +//
> +// Instrumentation-based profile-guided optimization
> +//
> +//===----------------------------------------------------------------------===//
> +
> +#include "CodeGenPGO.h"
> +#include "CodeGenFunction.h"
> +#include "clang/AST/RecursiveASTVisitor.h"
> +#include "clang/AST/StmtVisitor.h"
> +#include "llvm/IR/MDBuilder.h"
> +#include "llvm/Support/FileSystem.h"
> +
> +using namespace clang;
> +using namespace CodeGen;
> +
> +static void ReportBadPGOData(CodeGenModule &CGM, const char *Message) {
> + DiagnosticsEngine &Diags = CGM.getDiags();
> + unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error, Message);
> + Diags.Report(DiagID);
> +}
> +
> +PGOProfileData::PGOProfileData(CodeGenModule &CGM, std::string Path)
> + : CGM(CGM) {
> + if (llvm::MemoryBuffer::getFile(Path, DataBuffer)) {
> + ReportBadPGOData(CGM, "failed to open pgo data file");
> + return;
> + }
> +
> + if (DataBuffer->getBufferSize() > std::numeric_limits<unsigned>::max()) {
> + ReportBadPGOData(CGM, "pgo data file too big");
> + return;
> + }
> +
> + // Scan through the data file and map each function to the corresponding
> + // file offset where its counts are stored.
> + const char *BufferStart = DataBuffer->getBufferStart();
> + const char *BufferEnd = DataBuffer->getBufferEnd();
> + const char *CurPtr = BufferStart;
> + while (CurPtr < BufferEnd) {
> + // Read the mangled function name.
> + const char *FuncName = CurPtr;
> + // FIXME: Something will need to be added to distinguish static functions.
> + CurPtr = strchr(CurPtr, ' ');
> + if (!CurPtr) {
> + ReportBadPGOData(CGM, "pgo data file has malformed function entry");
> + return;
> + }
> + StringRef MangledName(FuncName, CurPtr - FuncName);
> +
> + // Read the number of counters.
> + char *EndPtr;
> + unsigned NumCounters = strtol(++CurPtr, &EndPtr, 10);
> + if (EndPtr == CurPtr || *EndPtr != '\n' || NumCounters <= 0) {
> + ReportBadPGOData(CGM, "pgo data file has unexpected number of counters");
> + return;
> + }
> + CurPtr = EndPtr;
> +
> + // There is one line for each counter; skip over those lines.
> + for (unsigned N = 0; N < NumCounters; ++N) {
> + CurPtr = strchr(++CurPtr, '\n');
> + if (!CurPtr) {
> + ReportBadPGOData(CGM, "pgo data file is missing some counter info");
> + return;
> + }
> + }
> +
> + // Skip over the blank line separating functions.
> + CurPtr += 2;
> +
> + DataOffsets[MangledName] = FuncName - BufferStart;
> + }
> +}
> +
> +bool PGOProfileData::getFunctionCounts(StringRef MangledName,
> + std::vector<uint64_t> &Counts) {
> + // Find the relevant section of the pgo-data file.
> + llvm::StringMap<unsigned>::const_iterator OffsetIter =
> + DataOffsets.find(MangledName);
> + if (OffsetIter == DataOffsets.end())
> + return true;
> + const char *CurPtr = DataBuffer->getBufferStart() + OffsetIter->getValue();
> +
> + // Skip over the function name.
> + CurPtr = strchr(CurPtr, ' ');
> + assert(CurPtr && "pgo-data has corrupted function entry");
> +
> + // Read the number of counters.
> + char *EndPtr;
> + unsigned NumCounters = strtol(++CurPtr, &EndPtr, 10);
> + assert(EndPtr != CurPtr && *EndPtr == '\n' && NumCounters > 0 &&
> + "pgo-data file has corrupted number of counters");
> + CurPtr = EndPtr;
> +
> + Counts.reserve(NumCounters);
> +
> + for (unsigned N = 0; N < NumCounters; ++N) {
> + // Read the count value.
> + uint64_t Count = strtoll(CurPtr, &EndPtr, 10);
> + if (EndPtr == CurPtr || *EndPtr != '\n') {
> + ReportBadPGOData(CGM, "pgo-data file has bad count value");
> + return true;
> + }
> + Counts.push_back(Count);
> + CurPtr = EndPtr + 1;
> + }
> +
> + // Make sure the number of counters matches up.
> + if (Counts.size() != NumCounters) {
> + ReportBadPGOData(CGM, "pgo-data file has inconsistent counters");
> + return true;
> + }
> +
> + return false;
> +}
> +
> +void CodeGenPGO::emitWriteoutFunction(GlobalDecl &GD) {
> + if (!CGM.getCodeGenOpts().ProfileInstrGenerate)
> + return;
> +
> + llvm::LLVMContext &Ctx = CGM.getLLVMContext();
> +
> + llvm::Type *Int32Ty = llvm::Type::getInt32Ty(Ctx);
> + llvm::Type *Int8PtrTy = llvm::Type::getInt8PtrTy(Ctx);
> +
> + llvm::Function *WriteoutF =
> + CGM.getModule().getFunction("__llvm_pgo_writeout");
> + if (!WriteoutF) {
> + llvm::FunctionType *WriteoutFTy =
> + llvm::FunctionType::get(llvm::Type::getVoidTy(Ctx), false);
> + WriteoutF = llvm::Function::Create(WriteoutFTy,
> + llvm::GlobalValue::InternalLinkage,
> + "__llvm_pgo_writeout", &CGM.getModule());
> + }
> + WriteoutF->setUnnamedAddr(true);
> + WriteoutF->addFnAttr(llvm::Attribute::NoInline);
> + if (CGM.getCodeGenOpts().DisableRedZone)
> + WriteoutF->addFnAttr(llvm::Attribute::NoRedZone);
> +
> + llvm::BasicBlock *BB = WriteoutF->empty() ?
> + llvm::BasicBlock::Create(Ctx, "", WriteoutF) : &WriteoutF->getEntryBlock();
> +
> + CGBuilderTy PGOBuilder(BB);
> +
> + llvm::Instruction *I = BB->getTerminator();
> + if (!I)
> + I = PGOBuilder.CreateRetVoid();
> + PGOBuilder.SetInsertPoint(I);
> +
> + llvm::Type *Int64PtrTy = llvm::Type::getInt64PtrTy(Ctx);
> + llvm::Type *Args[] = {
> + Int8PtrTy, // const char *MangledName
> + Int32Ty, // uint32_t NumCounters
> + Int64PtrTy // uint64_t *Counters
> + };
> + llvm::FunctionType *FTy =
> + llvm::FunctionType::get(PGOBuilder.getVoidTy(), Args, false);
> + llvm::Constant *EmitFunc =
> + CGM.getModule().getOrInsertFunction("llvm_pgo_emit", FTy);
> +
> + llvm::Constant *MangledName =
> + CGM.GetAddrOfConstantCString(CGM.getMangledName(GD), "__llvm_pgo_name");
> + MangledName = llvm::ConstantExpr::getBitCast(MangledName, Int8PtrTy);
> + PGOBuilder.CreateCall3(EmitFunc, MangledName,
> + PGOBuilder.getInt32(NumRegionCounters),
> + PGOBuilder.CreateBitCast(RegionCounters, Int64PtrTy));
> +}
> +
> +llvm::Function *CodeGenPGO::emitInitialization(CodeGenModule &CGM) {
> + llvm::Function *WriteoutF =
> + CGM.getModule().getFunction("__llvm_pgo_writeout");
> + if (!WriteoutF)
> + return NULL;
> +
> + // Create a small bit of code that registers the "__llvm_pgo_writeout" to
> + // be executed at exit.
> + llvm::Function *F = CGM.getModule().getFunction("__llvm_pgo_init");
> + if (F)
> + return NULL;
> +
> + llvm::LLVMContext &Ctx = CGM.getLLVMContext();
> + llvm::FunctionType *FTy = llvm::FunctionType::get(llvm::Type::getVoidTy(Ctx),
> + false);
> + F = llvm::Function::Create(FTy, llvm::GlobalValue::InternalLinkage,
> + "__llvm_pgo_init", &CGM.getModule());
> + F->setUnnamedAddr(true);
> + F->setLinkage(llvm::GlobalValue::InternalLinkage);
> + F->addFnAttr(llvm::Attribute::NoInline);
> + if (CGM.getCodeGenOpts().DisableRedZone)
> + F->addFnAttr(llvm::Attribute::NoRedZone);
> +
> + llvm::BasicBlock *BB = llvm::BasicBlock::Create(CGM.getLLVMContext(), "", F);
> + CGBuilderTy PGOBuilder(BB);
> +
> + FTy = llvm::FunctionType::get(PGOBuilder.getVoidTy(), false);
> + llvm::Type *Params[] = {
> + llvm::PointerType::get(FTy, 0)
> + };
> + FTy = llvm::FunctionType::get(PGOBuilder.getVoidTy(), Params, false);
> +
> + // Inialize the environment and register the local writeout function.
> + llvm::Constant *PGOInit =
> + CGM.getModule().getOrInsertFunction("llvm_pgo_init", FTy);
> + PGOBuilder.CreateCall(PGOInit, WriteoutF);
> + PGOBuilder.CreateRetVoid();
> +
> + return F;
> +}
> +
> +namespace {
> + /// A StmtVisitor that fills a map of statements to PGO counters.
> + struct MapRegionCounters : public ConstStmtVisitor<MapRegionCounters> {
> + /// The next counter value to assign.
> + unsigned NextCounter;
> + /// The map of statements to counters.
> + llvm::DenseMap<const Stmt*, unsigned> *CounterMap;
> +
> + MapRegionCounters(llvm::DenseMap<const Stmt*, unsigned> *CounterMap) :
> + NextCounter(0), CounterMap(CounterMap) {
> + }
> +
> + void VisitChildren(const Stmt *S) {
> + for (Stmt::const_child_range I = S->children(); I; ++I)
> + if (*I)
> + this->Visit(*I);
> + }
> + void VisitStmt(const Stmt *S) { VisitChildren(S); }
> +
> + /// Assign a counter to track entry to the function body
> + void VisitFunctionDecl(const FunctionDecl *S) {
> + (*CounterMap)[S->getBody()] = NextCounter++;
> + Visit(S->getBody());
> + }
> + /// Assign a counter to track the block following a label
> + void VisitLabelStmt(const LabelStmt *S) {
> + (*CounterMap)[S] = NextCounter++;
> + Visit(S->getSubStmt());
> + }
> + /// Assign three counters - one for the body of the loop, one for breaks
> + /// from the loop, and one for continues.
> + ///
> + /// The break and continue counters cover all such statements in this loop,
> + /// and are used in calculations to find the number of times the condition
> + /// and exit of the loop occur. They are needed so we can differentiate
> + /// these statements from non-local exits like return and goto.
> + void VisitWhileStmt(const WhileStmt *S) {
> + (*CounterMap)[S] = NextCounter;
> + NextCounter += 3;
> + Visit(S->getCond());
> + Visit(S->getBody());
> + }
> + /// Assign counters for the body of the loop, and for breaks and
> + /// continues. See VisitWhileStmt.
> + void VisitDoStmt(const DoStmt *S) {
> + (*CounterMap)[S] = NextCounter;
> + NextCounter += 3;
> + Visit(S->getBody());
> + Visit(S->getCond());
> + }
> + /// Assign counters for the body of the loop, and for breaks and
> + /// continues. See VisitWhileStmt.
> + void VisitForStmt(const ForStmt *S) {
> + (*CounterMap)[S] = NextCounter;
> + NextCounter += 3;
> + const Expr *E;
> + if ((E = S->getCond()))
> + Visit(E);
> + Visit(S->getBody());
> + if ((E = S->getInc()))
> + Visit(E);
> + }
> + /// Assign counters for the body of the loop, and for breaks and
> + /// continues. See VisitWhileStmt.
> + void VisitCXXForRangeStmt(const CXXForRangeStmt *S) {
> + (*CounterMap)[S] = NextCounter;
> + NextCounter += 3;
> + const Expr *E;
> + if ((E = S->getCond()))
> + Visit(E);
> + Visit(S->getBody());
> + if ((E = S->getInc()))
> + Visit(E);
> + }
> + /// Assign counters for the body of the loop, and for breaks and
> + /// continues. See VisitWhileStmt.
> + void VisitObjCForCollectionStmt(const ObjCForCollectionStmt *S) {
> + (*CounterMap)[S] = NextCounter;
> + NextCounter += 3;
> + Visit(S->getElement());
> + Visit(S->getBody());
> + }
> + /// Assign a counter for the exit block of the switch statement.
> + void VisitSwitchStmt(const SwitchStmt *S) {
> + (*CounterMap)[S] = NextCounter++;
> + Visit(S->getCond());
> + Visit(S->getBody());
> + }
> + /// Assign a counter for a particular case in a switch. This counts jumps
> + /// from the switch header as well as fallthrough from the case before this
> + /// one.
> + void VisitCaseStmt(const CaseStmt *S) {
> + (*CounterMap)[S] = NextCounter++;
> + Visit(S->getSubStmt());
> + }
> + /// Assign a counter for the default case of a switch statement. The count
> + /// is the number of branches from the loop header to the default, and does
> + /// not include fallthrough from previous cases. If we have multiple
> + /// conditional branch blocks from the switch instruction to the default
> + /// block, as with large GNU case ranges, this is the counter for the last
> + /// edge in that series, rather than the first.
> + void VisitDefaultStmt(const DefaultStmt *S) {
> + (*CounterMap)[S] = NextCounter++;
> + Visit(S->getSubStmt());
> + }
> + /// Assign a counter for the "then" part of an if statement. The count for
> + /// the "else" part, if it exists, will be calculated from this counter.
> + void VisitIfStmt(const IfStmt *S) {
> + (*CounterMap)[S] = NextCounter++;
> + Visit(S->getCond());
> + Visit(S->getThen());
> + if (S->getElse())
> + Visit(S->getElse());
> + }
> + /// Assign a counter for the continuation block of a C++ try statement.
> + void VisitCXXTryStmt(const CXXTryStmt *S) {
> + (*CounterMap)[S] = NextCounter++;
> + Visit(S->getTryBlock());
> + for (unsigned I = 0, E = S->getNumHandlers(); I < E; ++I)
> + Visit(S->getHandler(I));
> + }
> + /// Assign a counter for a catch statement's handler block.
> + void VisitCXXCatchStmt(const CXXCatchStmt *S) {
> + (*CounterMap)[S] = NextCounter++;
> + Visit(S->getHandlerBlock());
> + }
> + /// Assign a counter for the "true" part of a conditional operator. The
> + /// count in the "false" part will be calculated from this counter.
> + void VisitConditionalOperator(const ConditionalOperator *E) {
> + (*CounterMap)[E] = NextCounter++;
> + Visit(E->getCond());
> + Visit(E->getTrueExpr());
> + Visit(E->getFalseExpr());
> + }
> + /// Assign a counter for the right hand side of a logical and operator.
> + void VisitBinLAnd(const BinaryOperator *E) {
> + (*CounterMap)[E] = NextCounter++;
> + Visit(E->getLHS());
> + Visit(E->getRHS());
> + }
> + /// Assign a counter for the right hand side of a logical or operator.
> + void VisitBinLOr(const BinaryOperator *E) {
> + (*CounterMap)[E] = NextCounter++;
> + Visit(E->getLHS());
> + Visit(E->getRHS());
> + }
> + };
> +}
> +
> +void CodeGenPGO::assignRegionCounters(GlobalDecl &GD) {
> + bool InstrumentRegions = CGM.getCodeGenOpts().ProfileInstrGenerate;
> + PGOProfileData *PGOData = CGM.getPGOData();
> + if (!InstrumentRegions && !PGOData)
> + return;
> + const Decl *D = GD.getDecl();
> + if (!D)
> + return;
> + mapRegionCounters(D);
> + if (InstrumentRegions)
> + emitCounterVariables();
> + if (PGOData)
> + loadRegionCounts(GD, PGOData);
> +}
> +
> +void CodeGenPGO::mapRegionCounters(const Decl *D) {
> + RegionCounterMap = new llvm::DenseMap<const Stmt*, unsigned>();
> + MapRegionCounters Walker(RegionCounterMap);
> + if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D))
> + Walker.VisitFunctionDecl(FD);
> + NumRegionCounters = Walker.NextCounter;
> +}
> +
> +void CodeGenPGO::emitCounterVariables() {
> + llvm::LLVMContext &Ctx = CGM.getLLVMContext();
> + llvm::ArrayType *CounterTy = llvm::ArrayType::get(llvm::Type::getInt64Ty(Ctx),
> + NumRegionCounters);
> + RegionCounters =
> + new llvm::GlobalVariable(CGM.getModule(), CounterTy, false,
> + llvm::GlobalVariable::PrivateLinkage,
> + llvm::Constant::getNullValue(CounterTy),
> + "__llvm_pgo_ctr");
> +}
> +
> +void CodeGenPGO::emitCounterIncrement(CGBuilderTy &Builder, unsigned Counter) {
> + if (!CGM.getCodeGenOpts().ProfileInstrGenerate)
> + return;
> + llvm::Value *Addr =
> + Builder.CreateConstInBoundsGEP2_64(RegionCounters, 0, Counter);
> + llvm::Value *Count = Builder.CreateLoad(Addr, "pgocount");
> + Count = Builder.CreateAdd(Count, Builder.getInt64(1));
> + Builder.CreateStore(Count, Addr);
> +}
> +
> +void CodeGenPGO::loadRegionCounts(GlobalDecl &GD, PGOProfileData *PGOData) {
> + // For now, ignore the counts from the PGO data file only if the number of
> + // counters does not match. This could be tightened down in the future to
> + // ignore counts when the input changes in various ways, e.g., by comparing a
> + // hash value based on some characteristics of the input.
> + RegionCounts = new std::vector<uint64_t>();
> + if (PGOData->getFunctionCounts(CGM.getMangledName(GD), *RegionCounts) ||
> + RegionCounts->size() != NumRegionCounters) {
> + delete RegionCounts;
> + RegionCounts = 0;
> + }
> +}
> +
> +void CodeGenPGO::destroyRegionCounters() {
> + if (RegionCounterMap != 0)
> + delete RegionCounterMap;
> + if (RegionCounts != 0)
> + delete RegionCounts;
> +}
> +
> +llvm::MDNode *CodeGenPGO::createBranchWeights(uint64_t TrueCount,
> + uint64_t FalseCount) {
> + if (!TrueCount && !FalseCount)
> + return 0;
> +
> + llvm::MDBuilder MDHelper(CGM.getLLVMContext());
> + // TODO: need to scale down to 32-bits
> + // According to Laplace's Rule of Succession, it is better to compute the
> + // weight based on the count plus 1.
> + return MDHelper.createBranchWeights(TrueCount + 1, FalseCount + 1);
> +}
> +
> +llvm::MDNode *
> +CodeGenPGO::createBranchWeights(ArrayRef<uint64_t> Weights) {
> + llvm::MDBuilder MDHelper(CGM.getLLVMContext());
> + // TODO: need to scale down to 32-bits, instead of just truncating.
> + // According to Laplace's Rule of Succession, it is better to compute the
> + // weight based on the count plus 1.
> + SmallVector<uint32_t, 16> ScaledWeights;
> + ScaledWeights.reserve(Weights.size());
> + for (ArrayRef<uint64_t>::iterator WI = Weights.begin(), WE = Weights.end();
> + WI != WE; ++WI) {
> + ScaledWeights.push_back(*WI + 1);
> + }
> + return MDHelper.createBranchWeights(ScaledWeights);
> +}
>
> Added: cfe/trunk/lib/CodeGen/CodeGenPGO.h
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CodeGenPGO.h?rev=198640&view=auto
> ==============================================================================
> --- cfe/trunk/lib/CodeGen/CodeGenPGO.h (added)
> +++ cfe/trunk/lib/CodeGen/CodeGenPGO.h Mon Jan 6 16:27:43 2014
> @@ -0,0 +1,216 @@
> +//===--- CodeGenPGO.h - PGO Instrumentation for LLVM CodeGen ----*- C++ -*-===//
> +//
> +// The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
> +//===----------------------------------------------------------------------===//
> +//
> +// Instrumentation-based profile-guided optimization
> +//
> +//===----------------------------------------------------------------------===//
> +
> +#ifndef CLANG_CODEGEN_CODEGENPGO_H
> +#define CLANG_CODEGEN_CODEGENPGO_H
> +
> +#include "CGBuilder.h"
> +#include "CodeGenModule.h"
> +#include "CodeGenTypes.h"
> +#include "clang/Frontend/CodeGenOptions.h"
> +#include "llvm/ADT/OwningPtr.h"
> +#include "llvm/ADT/StringMap.h"
> +#include "llvm/Support/MemoryBuffer.h"
> +
> +namespace clang {
> +namespace CodeGen {
> +class RegionCounter;
> +
> +/// The raw counter data from an instrumented PGO binary
> +class PGOProfileData {
> +private:
> + /// The PGO data
> + llvm::OwningPtr<llvm::MemoryBuffer> DataBuffer;
> + /// Offsets into DataBuffer for each function's counters
> + llvm::StringMap<unsigned> DataOffsets;
> + CodeGenModule &CGM;
> +public:
> + PGOProfileData(CodeGenModule &CGM, std::string Path);
> + /// Fill Counts with the profile data for the given function name. Returns
> + /// false on success.
> + bool getFunctionCounts(StringRef MangledName, std::vector<uint64_t> &Counts);
> +};
> +
> +/// Per-function PGO state. This class should generally not be used directly,
> +/// but instead through the CodeGenFunction and RegionCounter types.
> +class CodeGenPGO {
> +private:
> + CodeGenModule &CGM;
> +
> + unsigned NumRegionCounters;
> + llvm::GlobalVariable *RegionCounters;
> + llvm::DenseMap<const Stmt*, unsigned> *RegionCounterMap;
> + std::vector<uint64_t> *RegionCounts;
> + uint64_t CurrentRegionCount;
> +
> +public:
> + CodeGenPGO(CodeGenModule &CGM)
> + : CGM(CGM), NumRegionCounters(0), RegionCounters(0), RegionCounterMap(0),
> + RegionCounts(0), CurrentRegionCount(0) {}
> + ~CodeGenPGO() {}
> +
> + /// Whether or not we have PGO region data for the current function. This is
> + /// false both when we have no data at all and when our data has been
> + /// discarded.
> + bool haveRegionCounts() const { return RegionCounts != 0; }
> +
> + /// Return the counter value of the current region.
> + uint64_t getCurrentRegionCount() const { return CurrentRegionCount; }
> + /// Return the counter value of the current region, or \p Min if it is larger.
> + uint64_t getCurrentRegionCountWithMin(uint64_t Min) {
> + return std::max(Min, CurrentRegionCount);
> + }
> + /// Set the counter value for the current region. This is used to keep track
> + /// of changes to the most recent counter from control flow and non-local
> + /// exits.
> + void setCurrentRegionCount(uint64_t Count) { CurrentRegionCount = Count; }
> +
> + /// Calculate branch weights appropriate for PGO data
> + llvm::MDNode *createBranchWeights(uint64_t TrueCount, uint64_t FalseCount);
> + llvm::MDNode *createBranchWeights(ArrayRef<uint64_t> Weights);
> +
> + /// Assign counters to regions and configure them for PGO of a given
> + /// function. Does nothing if instrumentation is not enabled and either
> + /// generates global variables or associates PGO data with each of the
> + /// counters depending on whether we are generating or using instrumentation.
> + void assignRegionCounters(GlobalDecl &GD);
> + /// Emit code to write counts for a given function to disk, if necessary.
> + void emitWriteoutFunction(GlobalDecl &GD);
> + /// Clean up region counter state. Must be called if assignRegionCounters is
> + /// used.
> + void destroyRegionCounters();
> + /// Emit the logic to register region counter write out functions. Returns a
> + /// function that implements this logic.
> + static llvm::Function *emitInitialization(CodeGenModule &CGM);
> +
> +private:
> + void mapRegionCounters(const Decl *D);
> + void loadRegionCounts(GlobalDecl &GD, PGOProfileData *PGOData);
> + void emitCounterVariables();
> +
> + /// Emit code to increment the counter at the given index
> + void emitCounterIncrement(CGBuilderTy &Builder, unsigned Counter);
> +
> + /// Return the region counter for the given statement. This should only be
> + /// called on statements that have a dedicated counter.
> + unsigned getRegionCounter(const Stmt *S) {
> + if (RegionCounterMap == 0)
> + return 0;
> + return (*RegionCounterMap)[S];
> + }
> +
> + /// Return the region count for the counter at the given index.
> + uint64_t getRegionCount(unsigned Counter) {
> + if (!haveRegionCounts())
> + return 0;
> + return (*RegionCounts)[Counter];
> + }
> +
> + friend class RegionCounter;
> +};
> +
> +/// A counter for a particular region. This is the primary interface through
> +/// which clients manage PGO counters and their values.
> +class RegionCounter {
> + CodeGenPGO *PGO;
> + unsigned Counter;
> + uint64_t Count;
> + uint64_t ParentCount;
> + uint64_t RegionCount;
> + int64_t Adjust;
> +
> + RegionCounter(CodeGenPGO &PGO, unsigned CounterIndex)
> + : PGO(&PGO), Counter(CounterIndex), Count(PGO.getRegionCount(Counter)),
> + ParentCount(PGO.getCurrentRegionCount()), Adjust(0) {}
> +
> +public:
> + RegionCounter(CodeGenPGO &PGO, const Stmt *S)
> + : PGO(&PGO), Counter(PGO.getRegionCounter(S)),
> + Count(PGO.getRegionCount(Counter)),
> + ParentCount(PGO.getCurrentRegionCount()), Adjust(0) {}
> +
> + /// Get the value of the counter. In most cases this is the number of times
> + /// the region of the counter was entered, but for switch labels it's the
> + /// number of direct jumps to that label.
> + uint64_t getCount() const { return Count; }
> + /// Get the value of the counter with adjustments applied. Adjustments occur
> + /// when control enters or leaves the region abnormally, ie, if there is a
> + /// jump to a label within the region, or if the function can return from
> + /// within the region. The adjusted count, then, is the value of the counter
> + /// at the end of the region.
> + uint64_t getAdjustedCount() const {
> + assert(Adjust > 0 || (uint64_t)(-Adjust) <= Count && "Negative count");
> + return Count + Adjust;
> + }
> + /// Get the value of the counter in this region's parent, ie, the region that
> + /// was active when this region began. This is useful for deriving counts in
> + /// implicitly counted regions, like the false case of a condition or the
> + /// normal exits of a loop.
> + uint64_t getParentCount() const { return ParentCount; }
> +
> + /// Get the number of times the condition of a loop will evaluate false. This
> + /// is the number of times we enter the loop, adjusted by the difference
> + /// between entering and exiting the loop body normally, excepting that
> + /// 'continue' statements also bring us back here.
> + ///
> + /// Undefined if this counter is not counting a loop.
> + uint64_t getLoopExitCount() const {
> + return getParentCount() + getContinueCounter().getCount() +
> + getAdjustedCount() - getCount();
> + }
> + /// Get the associated break counter. Undefined if this counter is not
> + /// counting a loop.
> + RegionCounter getBreakCounter() const {
> + return RegionCounter(*PGO, Counter + 1);
> + }
> + /// Get the associated continue counter. Undefined if this counter is not
> + /// counting a loop.
> + RegionCounter getContinueCounter() const {
> + return RegionCounter(*PGO, Counter + 2);
> + }
> +
> + /// Activate the counter by emitting an increment and starting to track
> + /// adjustments. If AddIncomingFallThrough is true, the current region count
> + /// will be added to the counter for the purposes of tracking the region.
> + void beginRegion(CGBuilderTy &Builder, bool AddIncomingFallThrough=false) {
> + RegionCount = Count;
> + if (AddIncomingFallThrough)
> + RegionCount += PGO->getCurrentRegionCount();
> + PGO->setCurrentRegionCount(RegionCount);
> + PGO->emitCounterIncrement(Builder, Counter);
> + }
> + /// For counters on boolean branches, begins tracking adjustments for the
> + /// uncounted path.
> + void beginElseRegion() {
> + RegionCount = ParentCount - Count;
> + PGO->setCurrentRegionCount(RegionCount);
> + }
> +
> + /// Control may either enter or leave the region, so the count at the end may
> + /// be different from the start. Call this to track that adjustment without
> + /// modifying the current count. Must not be called before one of beginRegion
> + /// or beginElseRegion.
> + void adjustFallThroughCount() {
> + Adjust += PGO->getCurrentRegionCount() - RegionCount;
> + }
> + /// Commit all adjustments to the current region. This should be called after
> + /// all blocks that adjust the fallthrough count have been emitted.
> + void applyAdjustmentsToRegion() {
> + PGO->setCurrentRegionCount(ParentCount + Adjust);
> + }
> +};
> +
> +} // end namespace CodeGen
> +} // end namespace clang
> +
> +#endif
>
> Added: cfe/trunk/test/CodeGen/Inputs/instr-profile.pgodata
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/Inputs/instr-profile.pgodata?rev=198640&view=auto
> ==============================================================================
> --- cfe/trunk/test/CodeGen/Inputs/instr-profile.pgodata (added)
> +++ cfe/trunk/test/CodeGen/Inputs/instr-profile.pgodata Mon Jan 6 16:27:43 2014
> @@ -0,0 +1,127 @@
> +simple_loops 10
> +1
> +100
> +0
> +0
> +100
> +0
> +0
> +76
> +0
> +0
> +
> +conditionals 13
> +1
> +100
> +0
> +0
> +50
> +50
> +33
> +33
> +16
> +99
> +100
> +99
> +100
> +
> +early_exits 13
> +1
> +0
> +51
> +1
> +25
> +1
> +25
> +1
> +26
> +0
> +0
> +1
> +0
> +
> +jumps 30
> +1
> +1
> +0
> +0
> +0
> +1
> +0
> +0
> +0
> +0
> +1
> +0
> +1
> +2
> +3
> +2
> +0
> +0
> +0
> +3
> +0
> +1
> +1
> +1
> +10
> +0
> +0
> +0
> +10
> +9
> +
> +switches 21
> +1
> +1
> +1
> +15
> +0
> +7
> +7
> +1
> +0
> +3
> +2
> +3
> +3
> +4
> +4
> +0
> +4
> +4
> +5
> +1
> +0
> +
> +big_switch 19
> +1
> +32
> +0
> +0
> +32
> +1
> +0
> +2
> +1
> +11
> +11
> +1
> +1
> +15
> +15
> +1
> +1
> +2
> +2
> +
> +no_usable_data 5
> +1
> +1
> +1
> +1
> +1
> +
> +main 1
> +1
>
> Added: cfe/trunk/test/CodeGen/instr-profile.c
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/instr-profile.c?rev=198640&view=auto
> ==============================================================================
> --- cfe/trunk/test/CodeGen/instr-profile.c (added)
> +++ cfe/trunk/test/CodeGen/instr-profile.c Mon Jan 6 16:27:43 2014
> @@ -0,0 +1,428 @@
> +// Test that instrumentation based profiling feeds branch prediction
> +// correctly. This tests both generation of profile data and use of the same,
> +// and the input file for the -fprofile-instr-use case is expected to be result
> +// of running the program generated by the -fprofile-instr-generate case
> +// (excepting no_usable_data). As such, main() should call every function in
> +// this test.
> +
> +// RUN: %clang %s -o - -emit-llvm -S -fprofile-instr-generate | FileCheck -check-prefix=PGOGEN %s
> +// RUN: %clang %s -o - -emit-llvm -S -fprofile-instr-use=%S/Inputs/instr-profile.pgodata | FileCheck -check-prefix=PGOUSE %s
> +
> +// PGOGEN: @[[SLC:__llvm_pgo_ctr[0-9]*]] = private global [10 x i64] zeroinitializer
> +// PGOGEN: @[[IFC:__llvm_pgo_ctr[0-9]*]] = private global [13 x i64] zeroinitializer
> +// PGOGEN: @[[EEC:__llvm_pgo_ctr[0-9]*]] = private global [13 x i64] zeroinitializer
> +// PGOGEN: @[[JMC:__llvm_pgo_ctr[0-9]*]] = private global [30 x i64] zeroinitializer
> +// PGOGEN: @[[SWC:__llvm_pgo_ctr[0-9]*]] = private global [21 x i64] zeroinitializer
> +// PGOGEN: @[[BSC:__llvm_pgo_ctr[0-9]*]] = private global [19 x i64] zeroinitializer
> +// PGOGEN: @[[NOC:__llvm_pgo_ctr[0-9]*]] = private global [2 x i64] zeroinitializer
> +// PGOGEN: @[[MAC:__llvm_pgo_ctr[0-9]*]] = private global [1 x i64] zeroinitializer
> +
> +// PGOGEN-LABEL: @simple_loops()
> +// PGOUSE-LABEL: @simple_loops()
> +// PGOGEN: store {{.*}} @[[SLC]], i64 0, i64 0
> +void simple_loops() {
> + int i;
> + // PGOGEN: store {{.*}} @[[SLC]], i64 0, i64 1
> + // PGOUSE: br {{.*}} !prof ![[SL1:[0-9]+]]
> + for (i = 0; i < 100; ++i) {
> + }
> + // PGOGEN: store {{.*}} @[[SLC]], i64 0, i64 4
> + // PGOUSE: br {{.*}} !prof ![[SL2:[0-9]+]]
> + while (i > 0)
> + i--;
> + // PGOGEN: store {{.*}} @[[SLC]], i64 0, i64 7
> + // PGOUSE: br {{.*}} !prof ![[SL3:[0-9]+]]
> + do {} while (i++ < 75);
> +
> + // PGOGEN-NOT: store {{.*}} @[[SLC]],
> + // PGOUSE-NOT: br {{.*}} !prof ![0-9]+
> +}
> +
> +// PGOGEN-LABEL: @conditionals()
> +// PGOUSE-LABEL: @conditionals()
> +// PGOGEN: store {{.*}} @[[IFC]], i64 0, i64 0
> +void conditionals() {
> + // PGOGEN: store {{.*}} @[[IFC]], i64 0, i64 1
> + // PGOUSE: br {{.*}} !prof ![[IF1:[0-9]+]]
> + for (int i = 0; i < 100; ++i) {
> + // PGOGEN: store {{.*}} @[[IFC]], i64 0, i64 4
> + // PGOUSE: br {{.*}} !prof ![[IF2:[0-9]+]]
> + if (i % 2) {
> + // PGOGEN: store {{.*}} @[[IFC]], i64 0, i64 5
> + // PGOUSE: br {{.*}} !prof ![[IF3:[0-9]+]]
> + if (i) {}
> + // PGOGEN: store {{.*}} @[[IFC]], i64 0, i64 6
> + // PGOUSE: br {{.*}} !prof ![[IF4:[0-9]+]]
> + } else if (i % 3) {
> + // PGOGEN: store {{.*}} @[[IFC]], i64 0, i64 7
> + // PGOUSE: br {{.*}} !prof ![[IF5:[0-9]+]]
> + if (i) {}
> + } else {
> + // PGOGEN: store {{.*}} @[[IFC]], i64 0, i64 8
> + // PGOUSE: br {{.*}} !prof ![[IF6:[0-9]+]]
> + if (i) {}
> + }
> +
> + // PGOGEN: store {{.*}} @[[IFC]], i64 0, i64 10
> + // PGOGEN: store {{.*}} @[[IFC]], i64 0, i64 9
> + // PGOUSE: br {{.*}} !prof ![[IF7:[0-9]+]]
> + if (1 && i) {}
> + // PGOGEN: store {{.*}} @[[IFC]], i64 0, i64 12
> + // PGOGEN: store {{.*}} @[[IFC]], i64 0, i64 11
> + // PGOUSE: br {{.*}} !prof ![[IF8:[0-9]+]]
> + if (0 || i) {}
> + }
> +
> + // PGOGEN-NOT: store {{.*}} @[EEC]],
> + // PGOUSE-NOT: br {{.*}} !prof ![0-9]+
> +}
> +
> +// PGOGEN-LABEL: @early_exits()
> +// PGOUSE-LABEL: @early_exits()
> +// PGOGEN: store {{.*}} @[[EEC]], i64 0, i64 0
> +void early_exits() {
> + int i = 0;
> +
> + // PGOGEN: store {{.*}} @[[EEC]], i64 0, i64 1
> + // PGOUSE: br {{.*}} !prof ![[EE1:[0-9]+]]
> + if (i) {}
> +
> + // PGOGEN: store {{.*}} @[[EEC]], i64 0, i64 2
> + // PGOUSE: br {{.*}} !prof ![[EE2:[0-9]+]]
> + while (i < 100) {
> + i++;
> + // PGOGEN: store {{.*}} @[[EEC]], i64 0, i64 5
> + // PGOUSE: br {{.*}} !prof ![[EE3:[0-9]+]]
> + if (i > 50)
> + // PGOGEN: store {{.*}} @[[EEC]], i64 0, i64 3
> + break;
> + // PGOGEN: store {{.*}} @[[EEC]], i64 0, i64 6
> + // PGOUSE: br {{.*}} !prof ![[EE4:[0-9]+]]
> + if (i % 2)
> + // PGOGEN: store {{.*}} @[[EEC]], i64 0, i64 4
> + continue;
> + }
> +
> + // PGOGEN: store {{.*}} @[[EEC]], i64 0, i64 7
> + // PGOUSE: br {{.*}} !prof ![[EE5:[0-9]+]]
> + if (i) {}
> +
> + // PGOGEN: store {{.*}} @[[EEC]], i64 0, i64 8
> + do {
> + // PGOGEN: store {{.*}} @[[EEC]], i64 0, i64 11
> + // PGOUSE: br {{.*}} !prof ![[EE6:[0-9]+]]
> + if (i > 75)
> + return;
> + else
> + i++;
> + // PGOUSE: br {{.*}} !prof ![[EE7:[0-9]+]]
> + } while (i < 100);
> +
> + // PGOGEN: store {{.*}} @[[EEC]], i64 0, i64 12
> + // Never reached -> no weights
> + if (i) {}
> +
> + // PGOGEN-NOT: store {{.*}} @[[EEC]],
> + // PGOUSE-NOT: br {{.*}} !prof ![0-9]+
> +}
> +
> +// PGOGEN-LABEL: @jumps()
> +// PGOUSE-LABEL: @jumps()
> +// PGOGEN: store {{.*}} @[[JMC]], i64 0, i64 0
> +void jumps() {
> + int i;
> +
> + // PGOGEN: store {{.*}} @[[JMC]], i64 0, i64 1
> + // PGOUSE: br {{.*}} !prof ![[JM1:[0-9]+]]
> + for (i = 0; i < 2; ++i) {
> + goto outofloop;
> + // Never reached -> no weights
> + if (i) {}
> + }
> +// PGOGEN: store {{.*}} @[[JMC]], i64 0, i64 5
> +outofloop:
> + // PGOGEN: store {{.*}} @[[JMC]], i64 0, i64 6
> + // PGOUSE: br {{.*}} !prof ![[JM2:[0-9]+]]
> + if (i) {}
> +
> + goto loop1;
> +
> + // PGOGEN: store {{.*}} @[[JMC]], i64 0, i64 7
> + // PGOUSE: br {{.*}} !prof ![[JM3:[0-9]+]]
> + while (i) {
> + // PGOGEN: store {{.*}} @[[JMC]], i64 0, i64 10
> + loop1:
> + // PGOGEN: store {{.*}} @[[JMC]], i64 0, i64 11
> + // PGOUSE: br {{.*}} !prof ![[JM4:[0-9]+]]
> + if (i) {}
> + }
> +
> + goto loop2;
> +// PGOGEN: store {{.*}} @[[JMC]], i64 0, i64 12
> +first:
> +// PGOGEN: store {{.*}} @[[JMC]], i64 0, i64 13
> +second:
> +// PGOGEN: store {{.*}} @[[JMC]], i64 0, i64 14
> +third:
> + i++;
> + // PGOGEN: store {{.*}} @[[JMC]], i64 0, i64 15
> + // PGOUSE: br {{.*}} !prof ![[JM5:[0-9]+]]
> + if (i < 3)
> + goto loop2;
> +
> + // PGOGEN: store {{.*}} @[[JMC]], i64 0, i64 16
> + // PGOUSE: br {{.*}} !prof ![[JM6:[0-9]+]]
> + while (i < 3) {
> + // PGOGEN: store {{.*}} @[[JMC]], i64 0, i64 19
> + loop2:
> + // PGOUSE: switch {{.*}} [
> + // PGOUSE: ], !prof ![[JM7:[0-9]+]]
> + switch (i) {
> + // PGOGEN: store {{.*}} @[[JMC]], i64 0, i64 21
> + case 0:
> + goto first;
> + // PGOGEN: store {{.*}} @[[JMC]], i64 0, i64 22
> + case 1:
> + goto second;
> + // PGOGEN: store {{.*}} @[[JMC]], i64 0, i64 23
> + case 2:
> + goto third;
> + }
> + // PGOGEN: store {{.*}} @[[JMC]], i64 0, i64 20
> + }
> +
> + // PGOGEN: store {{.*}} @[[JMC]], i64 0, i64 24
> + // PGOUSE: br {{.*}} !prof ![[JM8:[0-9]+]]
> + for (i = 0; i < 10; ++i) {
> + goto withinloop;
> + // never reached -> no weights
> + if (i) {}
> + // PGOGEN: store {{.*}} @[[JMC]], i64 0, i64 28
> + withinloop:
> + // PGOGEN: store {{.*}} @[[JMC]], i64 0, i64 29
> + // PGOUSE: br {{.*}} !prof ![[JM9:[0-9]+]]
> + if (i) {}
> + }
> +
> + // PGOGEN-NOT: store {{.*}} @[[JMC]],
> + // PGOUSE-NOT: br {{.*}} !prof ![0-9]+
> +}
> +
> +// PGOGEN-LABEL: @switches()
> +// PGOUSE-LABEL: @switches()
> +// PGOGEN: store {{.*}} @[[SWC]], i64 0, i64 0
> +void switches() {
> + static int weights[] = {1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 5};
> +
> + // No cases -> no weights
> + switch (weights[0]) {
> + // PGOGEN: store {{.*}} @[[SWC]], i64 0, i64 2
> + default:
> + break;
> + }
> + // PGOGEN: store {{.*}} @[[SWC]], i64 0, i64 1
> +
> + // PGOGEN: store {{.*}} @[[SWC]], i64 0, i64 3
> + // PGOUSE: br {{.*}} !prof ![[SW1:[0-9]+]]
> + for (int i = 0, len = sizeof(weights) / sizeof(weights[0]); i < len; ++i) {
> + // PGOUSE: switch {{.*}} [
> + // PGOUSE: ], !prof ![[SW2:[0-9]+]]
> + switch (i[weights]) {
> + // PGOGEN: store {{.*}} @[[SWC]], i64 0, i64 7
> + case 1:
> + // PGOGEN: store {{.*}} @[[SWC]], i64 0, i64 8
> + // PGOUSE: br {{.*}} !prof ![[SW3:[0-9]+]]
> + if (i) {}
> + // fallthrough
> + // PGOGEN: store {{.*}} @[[SWC]], i64 0, i64 9
> + case 2:
> + // PGOGEN: store {{.*}} @[[SWC]], i64 0, i64 10
> + // PGOUSE: br {{.*}} !prof ![[SW4:[0-9]+]]
> + if (i) {}
> + break;
> + // PGOGEN: store {{.*}} @[[SWC]], i64 0, i64 11
> + case 3:
> + // PGOGEN: store {{.*}} @[[SWC]], i64 0, i64 12
> + // PGOUSE: br {{.*}} !prof ![[SW5:[0-9]+]]
> + if (i) {}
> + continue;
> + // PGOGEN: store {{.*}} @[[SWC]], i64 0, i64 13
> + case 4:
> + // PGOGEN: store {{.*}} @[[SWC]], i64 0, i64 14
> + // PGOUSE: br {{.*}} !prof ![[SW6:[0-9]+]]
> + if (i) {}
> + // PGOUSE: switch {{.*}} [
> + // PGOUSE: ], !prof ![[SW7:[0-9]+]]
> + switch (i) {
> + // PGOGEN: store {{.*}} @[[SWC]], i64 0, i64 16
> + case 6 ... 9:
> + // PGOGEN: store {{.*}} @[[SWC]], i64 0, i64 17
> + // PGOUSE: br {{.*}} !prof ![[SW8:[0-9]+]]
> + if (i) {}
> + continue;
> + }
> + // PGOGEN: store {{.*}} @[[SWC]], i64 0, i64 15
> +
> + // PGOGEN: store {{.*}} @[[SWC]], i64 0, i64 18
> + default:
> + // PGOGEN: store {{.*}} @[[SWC]], i64 0, i64 19
> + // PGOUSE: br {{.*}} !prof ![[SW9:[0-9]+]]
> + if (i == len - 1)
> + return;
> + }
> + // PGOGEN: store {{.*}} @[[SWC]], i64 0, i64 6
> + }
> +
> + // PGOGEN: store {{.*}} @[[SWC]], i64 0, i64 20
> + // Never reached -> no weights
> + if (weights[0]) {}
> +
> + // PGOGEN-NOT: store {{.*}} @[[SWC]],
> + // PGOUSE-NOT: br {{.*}} !prof ![0-9]+
> +}
> +
> +// PGOGEN-LABEL: @big_switch()
> +// PGOUSE-LABEL: @big_switch()
> +// PGOGEN: store {{.*}} @[[BSC]], i64 0, i64 0
> +void big_switch() {
> + // PGOGEN: store {{.*}} @[[BSC]], i64 0, i64 1
> + // PGOUSE: br {{.*}} !prof ![[BS1:[0-9]+]]
> + for (int i = 0; i < 32; ++i) {
> + // PGOUSE: switch {{.*}} [
> + // PGOUSE: ], !prof ![[BS2:[0-9]+]]
> + switch (1 << i) {
> + // PGOGEN: store {{.*}} @[[BSC]], i64 0, i64 5
> + case (1 << 0):
> + // PGOGEN: store {{.*}} @[[BSC]], i64 0, i64 6
> + // PGOUSE: br {{.*}} !prof ![[BS3:[0-9]+]]
> + if (i) {}
> + // fallthrough
> + // PGOGEN: store {{.*}} @[[BSC]], i64 0, i64 7
> + case (1 << 1):
> + // PGOGEN: store {{.*}} @[[BSC]], i64 0, i64 8
> + // PGOUSE: br {{.*}} !prof ![[BS4:[0-9]+]]
> + if (i) {}
> + break;
> + // PGOGEN: store {{.*}} @[[BSC]], i64 0, i64 9
> + case (1 << 2) ... (1 << 12):
> + // PGOGEN: store {{.*}} @[[BSC]], i64 0, i64 10
> + // PGOUSE: br {{.*}} !prof ![[BS5:[0-9]+]]
> + if (i) {}
> + break;
> + // The branch for the large case range above appears after the case body
> + // PGOUSE: br {{.*}} !prof ![[BS6:[0-9]+]]
> +
> + // PGOGEN: store {{.*}} @[[BSC]], i64 0, i64 11
> + case (1 << 13):
> + // PGOGEN: store {{.*}} @[[BSC]], i64 0, i64 12
> + // PGOUSE: br {{.*}} !prof ![[BS7:[0-9]+]]
> + if (i) {}
> + break;
> + // PGOGEN: store {{.*}} @[[BSC]], i64 0, i64 13
> + case (1 << 14) ... (1 << 28):
> + // PGOGEN: store {{.*}} @[[BSC]], i64 0, i64 14
> + // PGOUSE: br {{.*}} !prof ![[BS8:[0-9]+]]
> + if (i) {}
> + break;
> + // The branch for the large case range above appears after the case body
> + // PGOUSE: br {{.*}} !prof ![[BS9:[0-9]+]]
> +
> + // PGOGEN: store {{.*}} @[[BSC]], i64 0, i64 15
> + case (1 << 29) ... ((1 << 29) + 1):
> + // PGOGEN: store {{.*}} @[[BSC]], i64 0, i64 16
> + // PGOUSE: br {{.*}} !prof ![[BS10:[0-9]+]]
> + if (i) {}
> + break;
> + // PGOGEN: store {{.*}} @[[BSC]], i64 0, i64 17
> + default:
> + // PGOGEN: store {{.*}} @[[BSC]], i64 0, i64 18
> + // PGOUSE: br {{.*}} !prof ![[BS11:[0-9]+]]
> + if (i) {}
> + break;
> + }
> + // PGOGEN: store {{.*}} @[[BSC]], i64 0, i64 4
> + }
> +
> + // PGOGEN-NOT: store {{.*}} @[[BSC]],
> + // PGOUSE-NOT: br {{.*}} !prof ![0-9]+
> + // PGOUSE: ret void
> +}
> +
> +// PGOGEN-LABEL: @no_usable_data()
> +// PGOUSE-LABEL: @no_usable_data()
> +// PGOGEN: store {{.*}} @[[NOC]], i64 0, i64 0
> +void no_usable_data() {
> + // The input data for PGOUSE is deliberately invalid for this function, so
> + // that we can test that we reject and ignore it properly.
> + int i = 0;
> +
> + // PGOGEN: store {{.*}} @[[NOC]], i64 0, i64 1
> + if (i) {}
> +
> + // PGOGEN-NOT: store {{.*}} @[[NOC]],
> + // PGOUSE-NOT: br {{.*}} !prof ![0-9]+
> +}
> +
> +// PGOUSE-DAG: ![[SL1]] = metadata !{metadata !"branch_weights", i32 101, i32 2}
> +// PGOUSE-DAG: ![[SL2]] = metadata !{metadata !"branch_weights", i32 101, i32 2}
> +// PGOUSE-DAG: ![[SL3]] = metadata !{metadata !"branch_weights", i32 76, i32 2}
> +
> +// PGOUSE-DAG: ![[EE1]] = metadata !{metadata !"branch_weights", i32 1, i32 2}
> +// PGOUSE-DAG: ![[EE2]] = metadata !{metadata !"branch_weights", i32 52, i32 1}
> +// PGOUSE-DAG: ![[EE3]] = metadata !{metadata !"branch_weights", i32 2, i32 51}
> +// PGOUSE-DAG: ![[EE4]] = metadata !{metadata !"branch_weights", i32 26, i32 26}
> +// PGOUSE-DAG: ![[EE5]] = metadata !{metadata !"branch_weights", i32 2, i32 1}
> +// PGOUSE-DAG: ![[EE6]] = metadata !{metadata !"branch_weights", i32 2, i32 26}
> +// PGOUSE-DAG: ![[EE7]] = metadata !{metadata !"branch_weights", i32 26, i32 1}
> +
> +// PGOUSE-DAG: ![[IF1]] = metadata !{metadata !"branch_weights", i32 101, i32 2}
> +// PGOUSE-DAG: ![[IF2]] = metadata !{metadata !"branch_weights", i32 51, i32 51}
> +// PGOUSE-DAG: ![[IF3]] = metadata !{metadata !"branch_weights", i32 51, i32 1}
> +// PGOUSE-DAG: ![[IF4]] = metadata !{metadata !"branch_weights", i32 34, i32 18}
> +// PGOUSE-DAG: ![[IF5]] = metadata !{metadata !"branch_weights", i32 34, i32 1}
> +// PGOUSE-DAG: ![[IF6]] = metadata !{metadata !"branch_weights", i32 17, i32 2}
> +// PGOUSE-DAG: ![[IF7]] = metadata !{metadata !"branch_weights", i32 100, i32 2}
> +// PGOUSE-DAG: ![[IF8]] = metadata !{metadata !"branch_weights", i32 100, i32 2}
> +
> +// PGOUSE-DAG: ![[JM1]] = metadata !{metadata !"branch_weights", i32 2, i32 1}
> +// PGOUSE-DAG: ![[JM2]] = metadata !{metadata !"branch_weights", i32 1, i32 2}
> +// PGOUSE-DAG: ![[JM3]] = metadata !{metadata !"branch_weights", i32 1, i32 2}
> +// PGOUSE-DAG: ![[JM4]] = metadata !{metadata !"branch_weights", i32 1, i32 2}
> +// PGOUSE-DAG: ![[JM5]] = metadata !{metadata !"branch_weights", i32 3, i32 2}
> +// PGOUSE-DAG: ![[JM6]] = metadata !{metadata !"branch_weights", i32 1, i32 2}
> +// PGOUSE-DAG: ![[JM7]] = metadata !{metadata !"branch_weights", i32 1, i32 2, i32 2, i32 2}
> +// PGOUSE-DAG: ![[JM8]] = metadata !{metadata !"branch_weights", i32 11, i32 2}
> +// PGOUSE-DAG: ![[JM9]] = metadata !{metadata !"branch_weights", i32 10, i32 2}
> +
> +// PGOUSE-DAG: ![[SW1]] = metadata !{metadata !"branch_weights", i32 16, i32 1}
> +// PGOUSE-DAG: ![[SW2]] = metadata !{metadata !"branch_weights", i32 6, i32 2, i32 3, i32 4, i32 5}
> +// PGOUSE-DAG: ![[SW3]] = metadata !{metadata !"branch_weights", i32 1, i32 2}
> +// PGOUSE-DAG: ![[SW4]] = metadata !{metadata !"branch_weights", i32 3, i32 2}
> +// PGOUSE-DAG: ![[SW5]] = metadata !{metadata !"branch_weights", i32 4, i32 1}
> +// PGOUSE-DAG: ![[SW6]] = metadata !{metadata !"branch_weights", i32 5, i32 1}
> +// PGOUSE-DAG: ![[SW7]] = metadata !{metadata !"branch_weights", i32 1, i32 2, i32 2, i32 2, i32 2}
> +// PGOUSE-DAG: ![[SW8]] = metadata !{metadata !"branch_weights", i32 5, i32 1}
> +// PGOUSE-DAG: ![[SW9]] = metadata !{metadata !"branch_weights", i32 2, i32 5}
> +
> +// PGOUSE-DAG: ![[BS1]] = metadata !{metadata !"branch_weights", i32 33, i32 2}
> +// PGOUSE-DAG: ![[BS2]] = metadata !{metadata !"branch_weights", i32 29, i32 2, i32 2, i32 2, i32 2, i32 1}
> +// PGOUSE-DAG: ![[BS3]] = metadata !{metadata !"branch_weights", i32 1, i32 2}
> +// PGOUSE-DAG: ![[BS4]] = metadata !{metadata !"branch_weights", i32 2, i32 2}
> +// PGOUSE-DAG: ![[BS5]] = metadata !{metadata !"branch_weights", i32 12, i32 1}
> +// PGOUSE-DAG: ![[BS6]] = metadata !{metadata !"branch_weights", i32 12, i32 3}
> +// PGOUSE-DAG: ![[BS7]] = metadata !{metadata !"branch_weights", i32 2, i32 1}
> +// PGOUSE-DAG: ![[BS8]] = metadata !{metadata !"branch_weights", i32 16, i32 1}
> +// PGOUSE-DAG: ![[BS9]] = metadata !{metadata !"branch_weights", i32 16, i32 14}
> +// PGOUSE-DAG: ![[BS10]] = metadata !{metadata !"branch_weights", i32 2, i32 1}
> +// PGOUSE-DAG: ![[BS11]] = metadata !{metadata !"branch_weights", i32 3, i32 1}
> +
> +int main(int argc, const char *argv[]) {
> + simple_loops();
> + conditionals();
> + early_exits();
> + jumps();
> + switches();
> + big_switch();
> + no_usable_data();
> + return 0;
> +}
>
> Added: cfe/trunk/test/CodeGenCXX/Inputs/instr-profile.pgodata
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCXX/Inputs/instr-profile.pgodata?rev=198640&view=auto
> ==============================================================================
> --- cfe/trunk/test/CodeGenCXX/Inputs/instr-profile.pgodata (added)
> +++ cfe/trunk/test/CodeGenCXX/Inputs/instr-profile.pgodata Mon Jan 6 16:27:43 2014
> @@ -0,0 +1,16 @@
> +_Z6throwsv 11
> +1
> +100
> +0
> +0
> +100
> +66
> +33
> +17
> +50
> +33
> +100
> +
> +main 1
> +1
> +
>
> Added: cfe/trunk/test/CodeGenCXX/instr-profile.cpp
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCXX/instr-profile.cpp?rev=198640&view=auto
> ==============================================================================
> --- cfe/trunk/test/CodeGenCXX/instr-profile.cpp (added)
> +++ cfe/trunk/test/CodeGenCXX/instr-profile.cpp Mon Jan 6 16:27:43 2014
> @@ -0,0 +1,73 @@
> +// Test that instrumentation based profiling feeds branch prediction
> +// correctly. This tests both generation of profile data and use of the same,
> +// and the input file for the -fprofile-instr-use case is expected to be result
> +// of running the program generated by the -fprofile-instr-generate case. As
> +// such, main() should call every function in this test.
> +
> +// RUN: %clangxx %s -o - -emit-llvm -S -fprofile-instr-generate | FileCheck -check-prefix=PGOGEN %s
> +// RUN: %clangxx %s -o - -emit-llvm -S -fprofile-instr-generate | FileCheck -check-prefix=PGOGEN-EXC %s
> +
> +// RUN: %clang %s -o - -emit-llvm -S -fprofile-instr-use=%S/Inputs/instr-profile.pgodata | FileCheck -check-prefix=PGOUSE %s
> +// RUN: %clang %s -o - -emit-llvm -S -fprofile-instr-use=%S/Inputs/instr-profile.pgodata | FileCheck -check-prefix=PGOUSE-EXC %s
> +
> +// PGOGEN: @[[THC:__llvm_pgo_ctr[0-9]*]] = private global [11 x i64] zeroinitializer
> +// PGOGEN-EXC: @[[THC:__llvm_pgo_ctr[0-9]*]] = private global [11 x i64] zeroinitializer
> +
> +// PGOGEN-LABEL: @_Z6throwsv()
> +// PGOUSE-LABEL: @_Z6throwsv()
> +// PGOGEN: store {{.*}} @[[THC]], i64 0, i64 0
> +void throws() {
> + // PGOGEN: store {{.*}} @[[THC]], i64 0, i64 1
> + // PGOUSE: br {{.*}} !prof ![[TH1:[0-9]+]]
> + for (int i = 0; i < 100; ++i) {
> + try {
> + // PGOGEN: store {{.*}} @[[THC]], i64 0, i64 5
> + // PGOUSE: br {{.*}} !prof ![[TH2:[0-9]+]]
> + if (i % 3) {
> + // PGOGEN: store {{.*}} @[[THC]], i64 0, i64 6
> + // PGOUSE: br {{.*}} !prof ![[TH3:[0-9]+]]
> + if (i < 50)
> + throw 1;
> + } else {
> + // The catch block may be emitted after the throw above, we can skip it
> + // by looking for an else block, but this will break if anyone puts an
> + // else in the catch
> + // PGOUSE: if.else{{.*}}:
> + // PGOGEN: if.else{{.*}}:
> +
> + // PGOGEN: store {{.*}} @[[THC]], i64 0, i64 7
> + // PGOUSE: br {{.*}} !prof ![[TH4:[0-9]+]]
> + if (i >= 50)
> + throw 0;
> + }
> + } catch (int e) {
> + // PGOUSE-EXC: catch{{.*}}:
> + // PGOGEN-EXC: catch{{.*}}:
> +
> + // PGOGEN-EXC: store {{.*}} @[[THC]], i64 0, i64 8
> + // PGOGEN-EXC: store {{.*}} @[[THC]], i64 0, i64 9
> + // PGOUSE-EXC: br {{.*}} !prof ![[TH5:[0-9]+]]
> + if (e) {}
> + }
> + // PGOGEN: store {{.*}} @[[THC]], i64 0, i64 4
> +
> + // PGOGEN: store {{.*}} @[[THC]], i64 0, i64 10
> + // PGOUSE: br {{.*}} !prof ![[TH6:[0-9]+]]
> + if (i < 100) {}
> + }
> +
> + // PGOUSE-NOT: br {{.*}} !prof ![0-9]+
> + // PGOUSE: ret void
> +}
> +
> +// PGOUSE-DAG: ![[TH1]] = metadata !{metadata !"branch_weights", i32 101, i32 2}
> +// PGOUSE-DAG: ![[TH2]] = metadata !{metadata !"branch_weights", i32 67, i32 35}
> +// PGOUSE-DAG: ![[TH3]] = metadata !{metadata !"branch_weights", i32 34, i32 34}
> +// PGOUSE-DAG: ![[TH4]] = metadata !{metadata !"branch_weights", i32 18, i32 18}
> +// PGOUSE-EXC: ![[TH5]] = metadata !{metadata !"branch_weights", i32 34, i32 18}
> +// PGOUSE-DAG: ![[TH6]] = metadata !{metadata !"branch_weights", i32 101, i32 1}
> +
> +int main(int argc, const char *argv[]) {
> + throws();
> + return 0;
> +}
>
>
> _______________________________________________
> cfe-commits mailing list
> cfe-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits
More information about the cfe-commits
mailing list