r198640 - CodeGen: Initial instrumentation based PGO implementation

Justin Bogner mail at justinbogner.com
Mon Jan 6 14:27:44 PST 2014


Author: bogner
Date: Mon Jan  6 16:27:43 2014
New Revision: 198640

URL: http://llvm.org/viewvc/llvm-project?rev=198640&view=rev
Log:
CodeGen: Initial instrumentation based PGO implementation

Added:
    cfe/trunk/lib/CodeGen/CodeGenPGO.cpp
    cfe/trunk/lib/CodeGen/CodeGenPGO.h
    cfe/trunk/test/CodeGen/Inputs/instr-profile.pgodata
    cfe/trunk/test/CodeGen/instr-profile.c
    cfe/trunk/test/CodeGenCXX/Inputs/
    cfe/trunk/test/CodeGenCXX/Inputs/instr-profile.pgodata
    cfe/trunk/test/CodeGenCXX/instr-profile.cpp
Modified:
    cfe/trunk/lib/CodeGen/CGCUDARuntime.cpp
    cfe/trunk/lib/CodeGen/CGCall.cpp
    cfe/trunk/lib/CodeGen/CGException.cpp
    cfe/trunk/lib/CodeGen/CGExpr.cpp
    cfe/trunk/lib/CodeGen/CGExprAgg.cpp
    cfe/trunk/lib/CodeGen/CGExprComplex.cpp
    cfe/trunk/lib/CodeGen/CGExprScalar.cpp
    cfe/trunk/lib/CodeGen/CGObjC.cpp
    cfe/trunk/lib/CodeGen/CGStmt.cpp
    cfe/trunk/lib/CodeGen/CMakeLists.txt
    cfe/trunk/lib/CodeGen/CodeGenFunction.cpp
    cfe/trunk/lib/CodeGen/CodeGenFunction.h
    cfe/trunk/lib/CodeGen/CodeGenModule.cpp
    cfe/trunk/lib/CodeGen/CodeGenModule.h

Modified: cfe/trunk/lib/CodeGen/CGCUDARuntime.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGCUDARuntime.cpp?rev=198640&r1=198639&r2=198640&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGCUDARuntime.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGCUDARuntime.cpp Mon Jan  6 16:27:43 2014
@@ -31,7 +31,8 @@ RValue CGCUDARuntime::EmitCUDAKernelCall
   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("kcall.end");
 
   CodeGenFunction::ConditionalEvaluation eval(CGF);
-  CGF.EmitBranchOnBoolExpr(E->getConfig(), ContBlock, ConfigOKBlock);
+  CGF.EmitBranchOnBoolExpr(E->getConfig(), ContBlock, ConfigOKBlock,
+                           /*TrueCount=*/0);
 
   eval.begin(CGF);
   CGF.EmitBlock(ConfigOKBlock);

Modified: cfe/trunk/lib/CodeGen/CGCall.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGCall.cpp?rev=198640&r1=198639&r2=198640&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGCall.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGCall.cpp Mon Jan  6 16:27:43 2014
@@ -2184,6 +2184,7 @@ void CodeGenFunction::EmitNoreturnRuntim
     call->setCallingConv(getRuntimeCC());
     Builder.CreateUnreachable();
   }
+  PGO.setCurrentRegionCount(0);
 }
 
 /// Emits a call or invoke instruction to the given nullary runtime

Modified: cfe/trunk/lib/CodeGen/CGException.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGException.cpp?rev=198640&r1=198639&r2=198640&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGException.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGException.cpp Mon Jan  6 16:27:43 2014
@@ -1294,6 +1294,10 @@ void CodeGenFunction::ExitCXXTryStmt(con
     // Initialize the catch variable and set up the cleanups.
     BeginCatch(*this, C);
 
+    // Emit the PGO counter increment
+    RegionCounter CatchCnt = getPGORegionCounter(C);
+    CatchCnt.beginRegion(Builder);
+
     // Perform the body of the catch.
     EmitStmt(C->getHandlerBlock());
 
@@ -1320,7 +1324,9 @@ void CodeGenFunction::ExitCXXTryStmt(con
       Builder.CreateBr(ContBB);
   }
 
+  RegionCounter ContCnt = getPGORegionCounter(&S);
   EmitBlock(ContBB);
+  ContCnt.beginRegion(Builder);
 }
 
 namespace {

Modified: cfe/trunk/lib/CodeGen/CGExpr.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGExpr.cpp?rev=198640&r1=198639&r2=198640&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGExpr.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGExpr.cpp Mon Jan  6 16:27:43 2014
@@ -2651,6 +2651,7 @@ EmitConditionalOperatorLValue(const Abst
   }
 
   OpaqueValueMapping binding(*this, expr);
+  RegionCounter Cnt = getPGORegionCounter(expr);
 
   const Expr *condExpr = expr->getCond();
   bool CondExprBool;
@@ -2658,8 +2659,12 @@ EmitConditionalOperatorLValue(const Abst
     const Expr *live = expr->getTrueExpr(), *dead = expr->getFalseExpr();
     if (!CondExprBool) std::swap(live, dead);
 
-    if (!ContainsLabel(dead))
+    if (!ContainsLabel(dead)) {
+      // If the true case is live, we need to track its region
+      if (CondExprBool)
+        Cnt.beginRegion(Builder);
       return EmitLValue(live);
+    }
   }
 
   llvm::BasicBlock *lhsBlock = createBasicBlock("cond.true");
@@ -2667,13 +2672,15 @@ EmitConditionalOperatorLValue(const Abst
   llvm::BasicBlock *contBlock = createBasicBlock("cond.end");
 
   ConditionalEvaluation eval(*this);
-  EmitBranchOnBoolExpr(condExpr, lhsBlock, rhsBlock);
+  EmitBranchOnBoolExpr(condExpr, lhsBlock, rhsBlock, Cnt.getCount());
 
   // Any temporaries created here are conditional.
   EmitBlock(lhsBlock);
+  Cnt.beginRegion(Builder);
   eval.begin(*this);
   LValue lhs = EmitLValue(expr->getTrueExpr());
   eval.end(*this);
+  Cnt.adjustFallThroughCount();
 
   if (!lhs.isSimple())
     return EmitUnsupportedLValue(expr, "conditional operator");
@@ -2683,14 +2690,17 @@ EmitConditionalOperatorLValue(const Abst
 
   // Any temporaries created here are conditional.
   EmitBlock(rhsBlock);
+  Cnt.beginElseRegion();
   eval.begin(*this);
   LValue rhs = EmitLValue(expr->getFalseExpr());
   eval.end(*this);
+  Cnt.adjustFallThroughCount();
   if (!rhs.isSimple())
     return EmitUnsupportedLValue(expr, "conditional operator");
   rhsBlock = Builder.GetInsertBlock();
 
   EmitBlock(contBlock);
+  Cnt.applyAdjustmentsToRegion();
 
   llvm::PHINode *phi = Builder.CreatePHI(lhs.getAddress()->getType(), 2,
                                          "cond-lvalue");

Modified: cfe/trunk/lib/CodeGen/CGExprAgg.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGExprAgg.cpp?rev=198640&r1=198639&r2=198640&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGExprAgg.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGExprAgg.cpp Mon Jan  6 16:27:43 2014
@@ -892,15 +892,18 @@ VisitAbstractConditionalOperator(const A
   // Bind the common expression if necessary.
   CodeGenFunction::OpaqueValueMapping binding(CGF, E);
 
+  RegionCounter Cnt = CGF.getPGORegionCounter(E);
   CodeGenFunction::ConditionalEvaluation eval(CGF);
-  CGF.EmitBranchOnBoolExpr(E->getCond(), LHSBlock, RHSBlock);
+  CGF.EmitBranchOnBoolExpr(E->getCond(), LHSBlock, RHSBlock, Cnt.getCount());
 
   // Save whether the destination's lifetime is externally managed.
   bool isExternallyDestructed = Dest.isExternallyDestructed();
 
   eval.begin(CGF);
   CGF.EmitBlock(LHSBlock);
+  Cnt.beginRegion(Builder);
   Visit(E->getTrueExpr());
+  Cnt.adjustFallThroughCount();
   eval.end(CGF);
 
   assert(CGF.HaveInsertPoint() && "expression evaluation ended with no IP!");
@@ -914,10 +917,13 @@ VisitAbstractConditionalOperator(const A
 
   eval.begin(CGF);
   CGF.EmitBlock(RHSBlock);
+  Cnt.beginElseRegion();
   Visit(E->getFalseExpr());
+  Cnt.adjustFallThroughCount();
   eval.end(CGF);
 
   CGF.EmitBlock(ContBlock);
+  Cnt.applyAdjustmentsToRegion();
 }
 
 void AggExprEmitter::VisitChooseExpr(const ChooseExpr *CE) {

Modified: cfe/trunk/lib/CodeGen/CGExprComplex.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGExprComplex.cpp?rev=198640&r1=198639&r2=198640&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGExprComplex.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGExprComplex.cpp Mon Jan  6 16:27:43 2014
@@ -752,22 +752,28 @@ VisitAbstractConditionalOperator(const A
   // Bind the common expression if necessary.
   CodeGenFunction::OpaqueValueMapping binding(CGF, E);
 
+  RegionCounter Cnt = CGF.getPGORegionCounter(E);
   CodeGenFunction::ConditionalEvaluation eval(CGF);
-  CGF.EmitBranchOnBoolExpr(E->getCond(), LHSBlock, RHSBlock);
+  CGF.EmitBranchOnBoolExpr(E->getCond(), LHSBlock, RHSBlock, Cnt.getCount());
 
   eval.begin(CGF);
   CGF.EmitBlock(LHSBlock);
+  Cnt.beginRegion(Builder);
   ComplexPairTy LHS = Visit(E->getTrueExpr());
+  Cnt.adjustFallThroughCount();
   LHSBlock = Builder.GetInsertBlock();
   CGF.EmitBranch(ContBlock);
   eval.end(CGF);
 
   eval.begin(CGF);
   CGF.EmitBlock(RHSBlock);
+  Cnt.beginElseRegion();
   ComplexPairTy RHS = Visit(E->getFalseExpr());
+  Cnt.adjustFallThroughCount();
   RHSBlock = Builder.GetInsertBlock();
   CGF.EmitBlock(ContBlock);
   eval.end(CGF);
+  Cnt.applyAdjustmentsToRegion();
 
   // Create a PHI node for the real part.
   llvm::PHINode *RealPN = Builder.CreatePHI(LHS.first->getType(), 2, "cond.r");

Modified: cfe/trunk/lib/CodeGen/CGExprScalar.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGExprScalar.cpp?rev=198640&r1=198639&r2=198640&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGExprScalar.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGExprScalar.cpp Mon Jan  6 16:27:43 2014
@@ -2874,8 +2874,12 @@ Value *ScalarExprEmitter::VisitBinAssign
 }
 
 Value *ScalarExprEmitter::VisitBinLAnd(const BinaryOperator *E) {
+  RegionCounter Cnt = CGF.getPGORegionCounter(E);
+
   // Perform vector logical and on comparisons with zero vectors.
   if (E->getType()->isVectorType()) {
+    Cnt.beginRegion(Builder);
+
     Value *LHS = Visit(E->getLHS());
     Value *RHS = Visit(E->getRHS());
     Value *Zero = llvm::ConstantAggregateZero::get(LHS->getType());
@@ -2897,6 +2901,8 @@ Value *ScalarExprEmitter::VisitBinLAnd(c
   bool LHSCondVal;
   if (CGF.ConstantFoldsToSimpleInteger(E->getLHS(), LHSCondVal)) {
     if (LHSCondVal) { // If we have 1 && X, just emit X.
+      Cnt.beginRegion(Builder);
+
       Value *RHSCond = CGF.EvaluateExprAsBool(E->getRHS());
       // ZExt result to int or bool.
       return Builder.CreateZExtOrBitCast(RHSCond, ResTy, "land.ext");
@@ -2913,7 +2919,7 @@ Value *ScalarExprEmitter::VisitBinLAnd(c
   CodeGenFunction::ConditionalEvaluation eval(CGF);
 
   // Branch on the LHS first.  If it is false, go to the failure (cont) block.
-  CGF.EmitBranchOnBoolExpr(E->getLHS(), RHSBlock, ContBlock);
+  CGF.EmitBranchOnBoolExpr(E->getLHS(), RHSBlock, ContBlock, Cnt.getCount());
 
   // Any edges into the ContBlock are now from an (indeterminate number of)
   // edges from this first condition.  All of these values will be false.  Start
@@ -2926,7 +2932,9 @@ Value *ScalarExprEmitter::VisitBinLAnd(c
 
   eval.begin(CGF);
   CGF.EmitBlock(RHSBlock);
+  Cnt.beginRegion(Builder);
   Value *RHSCond = CGF.EvaluateExprAsBool(E->getRHS());
+  Cnt.adjustFallThroughCount();
   eval.end(CGF);
 
   // Reaquire the RHS block, as there may be subblocks inserted.
@@ -2939,14 +2947,19 @@ Value *ScalarExprEmitter::VisitBinLAnd(c
     Builder.SetCurrentDebugLocation(llvm::DebugLoc());
   CGF.EmitBlock(ContBlock);
   PN->addIncoming(RHSCond, RHSBlock);
+  Cnt.applyAdjustmentsToRegion();
 
   // ZExt result to int.
   return Builder.CreateZExtOrBitCast(PN, ResTy, "land.ext");
 }
 
 Value *ScalarExprEmitter::VisitBinLOr(const BinaryOperator *E) {
+  RegionCounter Cnt = CGF.getPGORegionCounter(E);
+
   // Perform vector logical or on comparisons with zero vectors.
   if (E->getType()->isVectorType()) {
+    Cnt.beginRegion(Builder);
+
     Value *LHS = Visit(E->getLHS());
     Value *RHS = Visit(E->getRHS());
     Value *Zero = llvm::ConstantAggregateZero::get(LHS->getType());
@@ -2968,6 +2981,8 @@ Value *ScalarExprEmitter::VisitBinLOr(co
   bool LHSCondVal;
   if (CGF.ConstantFoldsToSimpleInteger(E->getLHS(), LHSCondVal)) {
     if (!LHSCondVal) { // If we have 0 || X, just emit X.
+      Cnt.beginRegion(Builder);
+
       Value *RHSCond = CGF.EvaluateExprAsBool(E->getRHS());
       // ZExt result to int or bool.
       return Builder.CreateZExtOrBitCast(RHSCond, ResTy, "lor.ext");
@@ -2984,7 +2999,8 @@ Value *ScalarExprEmitter::VisitBinLOr(co
   CodeGenFunction::ConditionalEvaluation eval(CGF);
 
   // Branch on the LHS first.  If it is true, go to the success (cont) block.
-  CGF.EmitBranchOnBoolExpr(E->getLHS(), ContBlock, RHSBlock);
+  CGF.EmitBranchOnBoolExpr(E->getLHS(), ContBlock, RHSBlock,
+                           Cnt.getParentCount() - Cnt.getCount());
 
   // Any edges into the ContBlock are now from an (indeterminate number of)
   // edges from this first condition.  All of these values will be true.  Start
@@ -2999,7 +3015,9 @@ Value *ScalarExprEmitter::VisitBinLOr(co
 
   // Emit the RHS condition as a bool value.
   CGF.EmitBlock(RHSBlock);
+  Cnt.beginRegion(Builder);
   Value *RHSCond = CGF.EvaluateExprAsBool(E->getRHS());
+  Cnt.adjustFallThroughCount();
 
   eval.end(CGF);
 
@@ -3010,6 +3028,7 @@ Value *ScalarExprEmitter::VisitBinLOr(co
   // into the phi node for the edge with the value of RHSCond.
   CGF.EmitBlock(ContBlock);
   PN->addIncoming(RHSCond, RHSBlock);
+  Cnt.applyAdjustmentsToRegion();
 
   // ZExt result to int.
   return Builder.CreateZExtOrBitCast(PN, ResTy, "lor.ext");
@@ -3049,6 +3068,7 @@ VisitAbstractConditionalOperator(const A
 
   // Bind the common expression if necessary.
   CodeGenFunction::OpaqueValueMapping binding(CGF, E);
+  RegionCounter Cnt = CGF.getPGORegionCounter(E);
 
   Expr *condExpr = E->getCond();
   Expr *lhsExpr = E->getTrueExpr();
@@ -3063,6 +3083,8 @@ VisitAbstractConditionalOperator(const A
 
     // If the dead side doesn't have labels we need, just emit the Live part.
     if (!CGF.ContainsLabel(dead)) {
+      if (CondExprBool)
+        Cnt.beginRegion(Builder);
       Value *Result = Visit(live);
 
       // If the live part is a throw expression, it acts like it has a void
@@ -3079,6 +3101,8 @@ VisitAbstractConditionalOperator(const A
   // the select function.
   if (CGF.getLangOpts().OpenCL
       && condExpr->getType()->isVectorType()) {
+    Cnt.beginRegion(Builder);
+
     llvm::Value *CondV = CGF.EmitScalarExpr(condExpr);
     llvm::Value *LHS = Visit(lhsExpr);
     llvm::Value *RHS = Visit(rhsExpr);
@@ -3122,6 +3146,8 @@ VisitAbstractConditionalOperator(const A
   // safe to evaluate the LHS and RHS unconditionally.
   if (isCheapEnoughToEvaluateUnconditionally(lhsExpr, CGF) &&
       isCheapEnoughToEvaluateUnconditionally(rhsExpr, CGF)) {
+    Cnt.beginRegion(Builder);
+
     llvm::Value *CondV = CGF.EvaluateExprAsBool(condExpr);
     llvm::Value *LHS = Visit(lhsExpr);
     llvm::Value *RHS = Visit(rhsExpr);
@@ -3138,23 +3164,28 @@ VisitAbstractConditionalOperator(const A
   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("cond.end");
 
   CodeGenFunction::ConditionalEvaluation eval(CGF);
-  CGF.EmitBranchOnBoolExpr(condExpr, LHSBlock, RHSBlock);
+  CGF.EmitBranchOnBoolExpr(condExpr, LHSBlock, RHSBlock, Cnt.getCount());
 
   CGF.EmitBlock(LHSBlock);
+  Cnt.beginRegion(Builder);
   eval.begin(CGF);
   Value *LHS = Visit(lhsExpr);
   eval.end(CGF);
+  Cnt.adjustFallThroughCount();
 
   LHSBlock = Builder.GetInsertBlock();
   Builder.CreateBr(ContBlock);
 
   CGF.EmitBlock(RHSBlock);
+  Cnt.beginElseRegion();
   eval.begin(CGF);
   Value *RHS = Visit(rhsExpr);
   eval.end(CGF);
+  Cnt.adjustFallThroughCount();
 
   RHSBlock = Builder.GetInsertBlock();
   CGF.EmitBlock(ContBlock);
+  Cnt.applyAdjustmentsToRegion();
 
   // If the LHS or RHS is a throw expression, it will be legitimately null.
   if (!LHS)

Modified: cfe/trunk/lib/CodeGen/CGObjC.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGObjC.cpp?rev=198640&r1=198639&r2=198640&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGObjC.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGObjC.cpp Mon Jan  6 16:27:43 2014
@@ -1523,10 +1523,13 @@ void CodeGenFunction::EmitObjCForCollect
   llvm::Value *initialMutations =
     Builder.CreateLoad(StateMutationsPtr, "forcoll.initial-mutations");
 
+  RegionCounter Cnt = getPGORegionCounter(&S);
+
   // Start looping.  This is the point we return to whenever we have a
   // fresh, non-empty batch of objects.
   llvm::BasicBlock *LoopBodyBB = createBasicBlock("forcoll.loopbody");
   EmitBlock(LoopBodyBB);
+  Cnt.beginRegion(Builder);
 
   // The current index into the buffer.
   llvm::PHINode *index = Builder.CreatePHI(UnsignedLongLTy, 3, "forcoll.index");
@@ -1623,7 +1626,7 @@ void CodeGenFunction::EmitObjCForCollect
     EmitAutoVarCleanups(variable);
 
   // Perform the loop body, setting up break and continue labels.
-  BreakContinueStack.push_back(BreakContinue(LoopEnd, AfterBody));
+  BreakContinueStack.push_back(BreakContinue(LoopEnd, AfterBody, &Cnt));
   {
     RunCleanupsScope Scope(*this);
     EmitStmt(S.getBody());
@@ -1642,6 +1645,7 @@ void CodeGenFunction::EmitObjCForCollect
   llvm::Value *indexPlusOne
     = Builder.CreateAdd(index, llvm::ConstantInt::get(UnsignedLongLTy, 1));
 
+  // TODO: We should probably model this as a "continue" for PGO
   // If we haven't overrun the buffer yet, we can continue.
   Builder.CreateCondBr(Builder.CreateICmpULT(indexPlusOne, count),
                        LoopBodyBB, FetchMoreBB);
@@ -1665,6 +1669,8 @@ void CodeGenFunction::EmitObjCForCollect
   index->addIncoming(zero, Builder.GetInsertBlock());
   count->addIncoming(refetchCount, Builder.GetInsertBlock());
 
+  // TODO: We should be applying PGO weights here, but this needs to handle the
+  // branch before FetchMoreBB or we risk getting the numbers wrong.
   Builder.CreateCondBr(Builder.CreateICmpEQ(refetchCount, zero),
                        EmptyBB, LoopBodyBB);
 
@@ -1687,6 +1693,7 @@ void CodeGenFunction::EmitObjCForCollect
     PopCleanupBlock();
 
   EmitBlock(LoopEnd.getBlock());
+  // TODO: Once we calculate PGO weights above, set the region count here
 }
 
 void CodeGenFunction::EmitObjCAtTryStmt(const ObjCAtTryStmt &S) {

Modified: cfe/trunk/lib/CodeGen/CGStmt.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGStmt.cpp?rev=198640&r1=198639&r2=198640&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGStmt.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGStmt.cpp Mon Jan  6 16:27:43 2014
@@ -358,7 +358,9 @@ void CodeGenFunction::EmitLabel(const La
     ResolveBranchFixups(Dest.getBlock());
   }
 
+  RegionCounter Cnt = getPGORegionCounter(D->getStmt());
   EmitBlock(Dest.getBlock());
+  Cnt.beginRegion(Builder);
 }
 
 /// Change the cleanup scope of the labels in this lexical scope to
@@ -402,12 +404,14 @@ void CodeGenFunction::EmitGotoStmt(const
     EmitStopPoint(&S);
 
   EmitBranchThroughCleanup(getJumpDestForLabel(S.getLabel()));
+  PGO.setCurrentRegionCount(0);
 }
 
 
 void CodeGenFunction::EmitIndirectGotoStmt(const IndirectGotoStmt &S) {
   if (const LabelDecl *Target = S.getConstantTarget()) {
     EmitBranchThroughCleanup(getJumpDestForLabel(Target));
+    PGO.setCurrentRegionCount(0);
     return;
   }
 
@@ -424,12 +428,14 @@ void CodeGenFunction::EmitIndirectGotoSt
   cast<llvm::PHINode>(IndGotoBB->begin())->addIncoming(V, CurBB);
 
   EmitBranch(IndGotoBB);
+  PGO.setCurrentRegionCount(0);
 }
 
 void CodeGenFunction::EmitIfStmt(const IfStmt &S) {
   // C99 6.8.4.1: The first substatement is executed if the expression compares
   // unequal to 0.  The condition must be a scalar type.
   LexicalScope ConditionScope(*this, S.getSourceRange());
+  RegionCounter Cnt = getPGORegionCounter(&S);
 
   if (S.getConditionVariable())
     EmitAutoVarDecl(*S.getConditionVariable());
@@ -447,6 +453,8 @@ void CodeGenFunction::EmitIfStmt(const I
     // If the skipped block has no labels in it, just emit the executed block.
     // This avoids emitting dead code and simplifies the CFG substantially.
     if (!ContainsLabel(Skipped)) {
+      if (CondConstant)
+        Cnt.beginRegion(Builder);
       if (Executed) {
         RunCleanupsScope ExecutedScope(*this);
         EmitStmt(Executed);
@@ -462,14 +470,17 @@ void CodeGenFunction::EmitIfStmt(const I
   llvm::BasicBlock *ElseBlock = ContBlock;
   if (S.getElse())
     ElseBlock = createBasicBlock("if.else");
-  EmitBranchOnBoolExpr(S.getCond(), ThenBlock, ElseBlock);
+
+  EmitBranchOnBoolExpr(S.getCond(), ThenBlock, ElseBlock, Cnt.getCount());
 
   // Emit the 'then' code.
-  EmitBlock(ThenBlock); 
+  EmitBlock(ThenBlock);
+  Cnt.beginRegion(Builder);
   {
     RunCleanupsScope ThenScope(*this);
     EmitStmt(S.getThen());
   }
+  Cnt.adjustFallThroughCount();
   EmitBranch(ContBlock);
 
   // Emit the 'else' code if present.
@@ -478,10 +489,12 @@ void CodeGenFunction::EmitIfStmt(const I
     if (getDebugInfo())
       Builder.SetCurrentDebugLocation(llvm::DebugLoc());
     EmitBlock(ElseBlock);
+    Cnt.beginElseRegion();
     {
       RunCleanupsScope ElseScope(*this);
       EmitStmt(Else);
     }
+    Cnt.adjustFallThroughCount();
     // There is no need to emit line number for unconditional branch.
     if (getDebugInfo())
       Builder.SetCurrentDebugLocation(llvm::DebugLoc());
@@ -490,9 +503,12 @@ void CodeGenFunction::EmitIfStmt(const I
 
   // Emit the continuation block for code after the if.
   EmitBlock(ContBlock, true);
+  Cnt.applyAdjustmentsToRegion();
 }
 
 void CodeGenFunction::EmitWhileStmt(const WhileStmt &S) {
+  RegionCounter Cnt = getPGORegionCounter(&S);
+
   // Emit the header for the loop, which will also become
   // the continue target.
   JumpDest LoopHeader = getJumpDestInCurrentScope("while.cond");
@@ -503,7 +519,7 @@ void CodeGenFunction::EmitWhileStmt(cons
   JumpDest LoopExit = getJumpDestInCurrentScope("while.end");
 
   // Store the blocks to use for break and continue.
-  BreakContinueStack.push_back(BreakContinue(LoopExit, LoopHeader));
+  BreakContinueStack.push_back(BreakContinue(LoopExit, LoopHeader, &Cnt));
 
   // C++ [stmt.while]p2:
   //   When the condition of a while statement is a declaration, the
@@ -525,6 +541,7 @@ void CodeGenFunction::EmitWhileStmt(cons
   // while(1) is common, avoid extra exit blocks.  Be sure
   // to correctly handle break/continue though.
   bool EmitBoolCondBranch = true;
+  llvm::BranchInst *CondBr = NULL;
   if (llvm::ConstantInt *C = dyn_cast<llvm::ConstantInt>(BoolCondVal))
     if (C->isOne())
       EmitBoolCondBranch = false;
@@ -536,7 +553,7 @@ void CodeGenFunction::EmitWhileStmt(cons
     if (ConditionScope.requiresCleanups())
       ExitBlock = createBasicBlock("while.exit");
 
-    Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock);
+    CondBr = Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock);
 
     if (ExitBlock != LoopExit.getBlock()) {
       EmitBlock(ExitBlock);
@@ -549,11 +566,19 @@ void CodeGenFunction::EmitWhileStmt(cons
   {
     RunCleanupsScope BodyScope(*this);
     EmitBlock(LoopBody);
+    Cnt.beginRegion(Builder);
     EmitStmt(S.getBody());
   }
+  Cnt.adjustFallThroughCount();
 
   BreakContinueStack.pop_back();
 
+  uint64_t LoopCount = Cnt.getCount();
+  uint64_t ExitCount = Cnt.getLoopExitCount();
+  if (EmitBoolCondBranch)
+    CondBr->setMetadata(llvm::LLVMContext::MD_prof,
+                        PGO.createBranchWeights(LoopCount, ExitCount));
+
   // Immediately force cleanup.
   ConditionScope.ForceCleanup();
 
@@ -562,6 +587,7 @@ void CodeGenFunction::EmitWhileStmt(cons
 
   // Emit the exit block.
   EmitBlock(LoopExit.getBlock(), true);
+  PGO.setCurrentRegionCount(ExitCount + Cnt.getBreakCounter().getCount());
 
   // The LoopHeader typically is just a branch if we skipped emitting
   // a branch, try to erase it.
@@ -573,16 +599,20 @@ void CodeGenFunction::EmitDoStmt(const D
   JumpDest LoopExit = getJumpDestInCurrentScope("do.end");
   JumpDest LoopCond = getJumpDestInCurrentScope("do.cond");
 
+  RegionCounter Cnt = getPGORegionCounter(&S);
+
   // Store the blocks to use for break and continue.
-  BreakContinueStack.push_back(BreakContinue(LoopExit, LoopCond));
+  BreakContinueStack.push_back(BreakContinue(LoopExit, LoopCond, &Cnt));
 
   // Emit the body of the loop.
   llvm::BasicBlock *LoopBody = createBasicBlock("do.body");
   EmitBlock(LoopBody);
+  Cnt.beginRegion(Builder);
   {
     RunCleanupsScope BodyScope(*this);
     EmitStmt(S.getBody());
   }
+  Cnt.adjustFallThroughCount();
 
   BreakContinueStack.pop_back();
 
@@ -603,12 +633,18 @@ void CodeGenFunction::EmitDoStmt(const D
     if (C->isZero())
       EmitBoolCondBranch = false;
 
+  uint64_t LoopCount = Cnt.getCount() - Cnt.getParentCount();
+  uint64_t ExitCount = Cnt.getLoopExitCount();
+
   // As long as the condition is true, iterate the loop.
-  if (EmitBoolCondBranch)
-    Builder.CreateCondBr(BoolCondVal, LoopBody, LoopExit.getBlock());
+  if (EmitBoolCondBranch) {
+    Builder.CreateCondBr(BoolCondVal, LoopBody, LoopExit.getBlock(),
+                         PGO.createBranchWeights(LoopCount, ExitCount));
+  }
 
   // Emit the exit block.
   EmitBlock(LoopExit.getBlock());
+  PGO.setCurrentRegionCount(ExitCount + Cnt.getBreakCounter().getCount());
 
   // The DoCond block typically is just a branch if we skipped
   // emitting a branch, try to erase it.
@@ -617,6 +653,8 @@ void CodeGenFunction::EmitDoStmt(const D
 }
 
 void CodeGenFunction::EmitForStmt(const ForStmt &S) {
+  RegionCounter Cnt = getPGORegionCounter(&S);
+
   JumpDest LoopExit = getJumpDestInCurrentScope("for.end");
 
   RunCleanupsScope ForScope(*this);
@@ -639,6 +677,7 @@ void CodeGenFunction::EmitForStmt(const
   // Create a cleanup scope for the condition variable cleanups.
   RunCleanupsScope ConditionScope(*this);
 
+  llvm::BranchInst *CondBr = NULL;
   if (S.getCond()) {
     // If the for statement has a condition scope, emit the local variable
     // declaration.
@@ -658,7 +697,7 @@ void CodeGenFunction::EmitForStmt(const
     // C99 6.8.5p2/p4: The first substatement is executed if the expression
     // compares unequal to 0.  The condition must be a scalar type.
     llvm::Value *BoolCondVal = EvaluateExprAsBool(S.getCond());
-    Builder.CreateCondBr(BoolCondVal, ForBody, ExitBlock);
+    CondBr = Builder.CreateCondBr(BoolCondVal, ForBody, ExitBlock);
 
     if (ExitBlock != LoopExit.getBlock()) {
       EmitBlock(ExitBlock);
@@ -670,6 +709,7 @@ void CodeGenFunction::EmitForStmt(const
     // Treat it as a non-zero constant.  Don't even create a new block for the
     // body, just fall into it.
   }
+  Cnt.beginRegion(Builder);
 
   // If the for loop doesn't have an increment we can just use the
   // condition as the continue block.  Otherwise we'll need to create
@@ -679,7 +719,7 @@ void CodeGenFunction::EmitForStmt(const
     Continue = getJumpDestInCurrentScope("for.inc");
 
   // Store the blocks to use for break and continue.
-  BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
+  BreakContinueStack.push_back(BreakContinue(LoopExit, Continue, &Cnt));
 
   {
     // Create a separate cleanup scope for the body, in case it is not
@@ -693,9 +733,16 @@ void CodeGenFunction::EmitForStmt(const
     EmitBlock(Continue.getBlock());
     EmitStmt(S.getInc());
   }
+  Cnt.adjustFallThroughCount();
 
   BreakContinueStack.pop_back();
 
+  uint64_t LoopCount = Cnt.getCount();
+  uint64_t ExitCount = Cnt.getLoopExitCount();
+  if (S.getCond())
+    CondBr->setMetadata(llvm::LLVMContext::MD_prof,
+                        PGO.createBranchWeights(LoopCount, ExitCount));
+
   ConditionScope.ForceCleanup();
   EmitBranch(CondBlock);
 
@@ -706,9 +753,12 @@ void CodeGenFunction::EmitForStmt(const
 
   // Emit the fall-through block.
   EmitBlock(LoopExit.getBlock(), true);
+  PGO.setCurrentRegionCount(ExitCount + Cnt.getBreakCounter().getCount());
 }
 
 void CodeGenFunction::EmitCXXForRangeStmt(const CXXForRangeStmt &S) {
+  RegionCounter Cnt = getPGORegionCounter(&S);
+
   JumpDest LoopExit = getJumpDestInCurrentScope("for.end");
 
   RunCleanupsScope ForScope(*this);
@@ -739,7 +789,8 @@ void CodeGenFunction::EmitCXXForRangeStm
   // The body is executed if the expression, contextually converted
   // to bool, is true.
   llvm::Value *BoolCondVal = EvaluateExprAsBool(S.getCond());
-  Builder.CreateCondBr(BoolCondVal, ForBody, ExitBlock);
+  llvm::BranchInst *CondBr = Builder.CreateCondBr(BoolCondVal,
+                                                  ForBody, ExitBlock);
 
   if (ExitBlock != LoopExit.getBlock()) {
     EmitBlock(ExitBlock);
@@ -747,12 +798,13 @@ void CodeGenFunction::EmitCXXForRangeStm
   }
 
   EmitBlock(ForBody);
+  Cnt.beginRegion(Builder);
 
   // Create a block for the increment. In case of a 'continue', we jump there.
   JumpDest Continue = getJumpDestInCurrentScope("for.inc");
 
   // Store the blocks to use for break and continue.
-  BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
+  BreakContinueStack.push_back(BreakContinue(LoopExit, Continue, &Cnt));
 
   {
     // Create a separate cleanup scope for the loop variable and body.
@@ -764,9 +816,15 @@ void CodeGenFunction::EmitCXXForRangeStm
   // If there is an increment, emit it next.
   EmitBlock(Continue.getBlock());
   EmitStmt(S.getInc());
+  Cnt.adjustFallThroughCount();
 
   BreakContinueStack.pop_back();
 
+  uint64_t LoopCount = Cnt.getCount();
+  uint64_t ExitCount = Cnt.getLoopExitCount();
+  CondBr->setMetadata(llvm::LLVMContext::MD_prof,
+                      PGO.createBranchWeights(LoopCount, ExitCount));
+
   EmitBranch(CondBlock);
 
   ForScope.ForceCleanup();
@@ -776,6 +834,7 @@ void CodeGenFunction::EmitCXXForRangeStm
 
   // Emit the fall-through block.
   EmitBlock(LoopExit.getBlock(), true);
+  PGO.setCurrentRegionCount(ExitCount + Cnt.getBreakCounter().getCount());
 }
 
 void CodeGenFunction::EmitReturnOfRValue(RValue RV, QualType Ty) {
@@ -789,6 +848,7 @@ void CodeGenFunction::EmitReturnOfRValue
                        /*init*/ true);
   }
   EmitBranchThroughCleanup(ReturnBlock);
+  PGO.setCurrentRegionCount(0);
 }
 
 /// EmitReturnStmt - Note that due to GCC extensions, this can have an operand
@@ -860,6 +920,7 @@ void CodeGenFunction::EmitReturnStmt(con
 
   cleanupScope.ForceCleanup();
   EmitBranchThroughCleanup(ReturnBlock);
+  PGO.setCurrentRegionCount(0);
 }
 
 void CodeGenFunction::EmitDeclStmt(const DeclStmt &S) {
@@ -882,8 +943,14 @@ void CodeGenFunction::EmitBreakStmt(cons
   if (HaveInsertPoint())
     EmitStopPoint(&S);
 
-  JumpDest Block = BreakContinueStack.back().BreakBlock;
-  EmitBranchThroughCleanup(Block);
+  BreakContinue &BC = BreakContinueStack.back();
+  // We keep track of breaks from the loop so we can differentiate them from
+  // non-local exits in PGO instrumentation. This only applies to loops, not
+  // breaks from switch statements.
+  if (BC.CountBreak)
+    BC.LoopCnt->getBreakCounter().beginRegion(Builder);
+  EmitBranchThroughCleanup(BC.BreakBlock);
+  PGO.setCurrentRegionCount(0);
 }
 
 void CodeGenFunction::EmitContinueStmt(const ContinueStmt &S) {
@@ -895,8 +962,12 @@ void CodeGenFunction::EmitContinueStmt(c
   if (HaveInsertPoint())
     EmitStopPoint(&S);
 
-  JumpDest Block = BreakContinueStack.back().ContinueBlock;
-  EmitBranchThroughCleanup(Block);
+  BreakContinue &BC = BreakContinueStack.back();
+  // We keep track of continues in the loop so we can differentiate them from
+  // non-local exits in PGO instrumentation.
+  BC.LoopCnt->getContinueCounter().beginRegion(Builder);
+  EmitBranchThroughCleanup(BC.ContinueBlock);
+  PGO.setCurrentRegionCount(0);
 }
 
 /// EmitCaseStmtRange - If case statement range is not too big then
@@ -908,11 +979,14 @@ void CodeGenFunction::EmitCaseStmtRange(
   llvm::APSInt LHS = S.getLHS()->EvaluateKnownConstInt(getContext());
   llvm::APSInt RHS = S.getRHS()->EvaluateKnownConstInt(getContext());
 
+  RegionCounter CaseCnt = getPGORegionCounter(&S);
+
   // Emit the code for this case. We do this first to make sure it is
   // properly chained from our predecessor before generating the
   // switch machinery to enter this block.
   EmitBlock(createBasicBlock("sw.bb"));
   llvm::BasicBlock *CaseDest = Builder.GetInsertBlock();
+  CaseCnt.beginRegion(Builder);
   EmitStmt(S.getSubStmt());
 
   // If range is empty, do nothing.
@@ -923,7 +997,17 @@ void CodeGenFunction::EmitCaseStmtRange(
   // FIXME: parameters such as this should not be hardcoded.
   if (Range.ult(llvm::APInt(Range.getBitWidth(), 64))) {
     // Range is small enough to add multiple switch instruction cases.
-    for (unsigned i = 0, e = Range.getZExtValue() + 1; i != e; ++i) {
+    uint64_t Total = CaseCnt.getCount() - CaseCnt.getParentCount();
+    unsigned NCases = Range.getZExtValue() + 1;
+    // Divide the weights evenly between the cases, ensuring that the total
+    // weight is preserved. Ie, a weight of 5 over three cases will be
+    // distributed as weights of 2, 2, and 1.
+    uint64_t Weight = Total / NCases, Rem = Total % NCases;
+    for (unsigned I = 0; I != NCases; ++I) {
+      if (SwitchWeights)
+        SwitchWeights->push_back(Weight + (Rem ? 1 : 0));
+      if (Rem)
+        Rem--;
       SwitchInsn->addCase(Builder.getInt(LHS), CaseDest);
       LHS++;
     }
@@ -948,7 +1032,19 @@ void CodeGenFunction::EmitCaseStmtRange(
     Builder.CreateSub(SwitchInsn->getCondition(), Builder.getInt(LHS));
   llvm::Value *Cond =
     Builder.CreateICmpULE(Diff, Builder.getInt(Range), "inbounds");
-  Builder.CreateCondBr(Cond, CaseDest, FalseDest);
+
+  llvm::MDNode *Weights = 0;
+  if (SwitchWeights) {
+    uint64_t ThisCount = CaseCnt.getCount() - CaseCnt.getParentCount();
+    uint64_t DefaultCount = (*SwitchWeights)[0];
+    Weights = PGO.createBranchWeights(ThisCount, DefaultCount);
+
+    // Since we're chaining the switch default through each large case range, we
+    // need to update the weight for the default, ie, the first case, to include
+    // this case.
+    (*SwitchWeights)[0] += ThisCount;
+  }
+  Builder.CreateCondBr(Cond, CaseDest, FalseDest, Weights);
 
   // Restore the appropriate insertion point.
   if (RestoreBB)
@@ -974,17 +1070,22 @@ void CodeGenFunction::EmitCaseStmt(const
     return;
   }
 
+  RegionCounter CaseCnt = getPGORegionCounter(&S);
   llvm::ConstantInt *CaseVal =
     Builder.getInt(S.getLHS()->EvaluateKnownConstInt(getContext()));
 
-  // If the body of the case is just a 'break', and if there was no fallthrough,
-  // try to not emit an empty block.
-  if ((CGM.getCodeGenOpts().OptimizationLevel > 0) &&
+  // If the body of the case is just a 'break', try to not emit an empty block.
+  // If we're profiling or we're not optimizing, leave the block in for better
+  // debug and coverage analysis.
+  if (!CGM.getCodeGenOpts().ProfileInstrGenerate &&
+      CGM.getCodeGenOpts().OptimizationLevel > 0 &&
       isa<BreakStmt>(S.getSubStmt())) {
     JumpDest Block = BreakContinueStack.back().BreakBlock;
 
     // Only do this optimization if there are no cleanups that need emitting.
     if (isObviouslyBranchWithoutCleanups(Block)) {
+      if (SwitchWeights)
+        SwitchWeights->push_back(CaseCnt.getCount() - CaseCnt.getParentCount());
       SwitchInsn->addCase(CaseVal, Block.getBlock());
 
       // If there was a fallthrough into this case, make sure to redirect it to
@@ -999,6 +1100,9 @@ void CodeGenFunction::EmitCaseStmt(const
 
   EmitBlock(createBasicBlock("sw.bb"));
   llvm::BasicBlock *CaseDest = Builder.GetInsertBlock();
+  if (SwitchWeights)
+    SwitchWeights->push_back(CaseCnt.getCount() - CaseCnt.getParentCount());
+  CaseCnt.beginRegion(Builder);
   SwitchInsn->addCase(CaseVal, CaseDest);
 
   // Recursively emitting the statement is acceptable, but is not wonderful for
@@ -1016,8 +1120,14 @@ void CodeGenFunction::EmitCaseStmt(const
   // Otherwise, iteratively add consecutive cases to this switch stmt.
   while (NextCase && NextCase->getRHS() == 0) {
     CurCase = NextCase;
-    llvm::ConstantInt *CaseVal = 
+    llvm::ConstantInt *CaseVal =
       Builder.getInt(CurCase->getLHS()->EvaluateKnownConstInt(getContext()));
+
+    CaseCnt = getPGORegionCounter(NextCase);
+    if (SwitchWeights)
+      SwitchWeights->push_back(CaseCnt.getCount() - CaseCnt.getParentCount());
+    CaseCnt.beginRegion(Builder);
+
     SwitchInsn->addCase(CaseVal, CaseDest);
     NextCase = dyn_cast<CaseStmt>(CurCase->getSubStmt());
   }
@@ -1030,7 +1140,22 @@ void CodeGenFunction::EmitDefaultStmt(co
   llvm::BasicBlock *DefaultBlock = SwitchInsn->getDefaultDest();
   assert(DefaultBlock->empty() &&
          "EmitDefaultStmt: Default block already defined?");
+
+  llvm::BasicBlock *SkipCountBB = 0;
+  if (CGM.getCodeGenOpts().ProfileInstrGenerate) {
+    // The PGO region here needs to count the number of times the edge occurs,
+    // so fallthrough into this case will jump past the region counter to the
+    // skipcount basic block.
+    SkipCountBB = createBasicBlock("skipcount");
+    EmitBranch(SkipCountBB);
+  }
   EmitBlock(DefaultBlock);
+
+  RegionCounter Cnt = getPGORegionCounter(&S);
+  Cnt.beginRegion(Builder, /*AddIncomingFallThrough=*/true);
+
+  if (SkipCountBB)
+    EmitBlock(SkipCountBB);
   EmitStmt(S.getSubStmt());
 }
 
@@ -1187,7 +1312,8 @@ static CSFC_Result CollectStatementsForC
 static bool FindCaseStatementsForValue(const SwitchStmt &S,
                                        const llvm::APSInt &ConstantCondValue,
                                 SmallVectorImpl<const Stmt*> &ResultStmts,
-                                       ASTContext &C) {
+                                       ASTContext &C,
+                                       const SwitchCase *&ResultCase) {
   // First step, find the switch case that is being branched to.  We can do this
   // efficiently by scanning the SwitchCase list.
   const SwitchCase *Case = S.getSwitchCaseList();
@@ -1230,6 +1356,7 @@ static bool FindCaseStatementsForValue(c
   //   while (1) {
   //     case 4: ...
   bool FoundCase = false;
+  ResultCase = Case;
   return CollectStatementsForCase(S.getBody(), Case, FoundCase,
                                   ResultStmts) != CSFC_Failure &&
          FoundCase;
@@ -1245,6 +1372,7 @@ void CodeGenFunction::EmitSwitchStmt(con
 
   // Handle nested switch statements.
   llvm::SwitchInst *SavedSwitchInsn = SwitchInsn;
+  SmallVector<uint64_t, 16> *SavedSwitchWeights = SwitchWeights;
   llvm::BasicBlock *SavedCRBlock = CaseRangeBlock;
 
   // See if we can constant fold the condition of the switch and therefore only
@@ -1252,8 +1380,14 @@ void CodeGenFunction::EmitSwitchStmt(con
   llvm::APSInt ConstantCondValue;
   if (ConstantFoldsToSimpleInteger(S.getCond(), ConstantCondValue)) {
     SmallVector<const Stmt*, 4> CaseStmts;
+    const SwitchCase *Case = 0;
     if (FindCaseStatementsForValue(S, ConstantCondValue, CaseStmts,
-                                   getContext())) {
+                                   getContext(), Case)) {
+      PGO.setCurrentRegionCount(0);
+      if (Case) {
+        RegionCounter CaseCnt = getPGORegionCounter(Case);
+        CaseCnt.beginRegion(Builder);
+      }
       RunCleanupsScope ExecutedScope(*this);
 
       // At this point, we are no longer "within" a switch instance, so
@@ -1265,6 +1399,8 @@ void CodeGenFunction::EmitSwitchStmt(con
       // specified series of statements and we're good.
       for (unsigned i = 0, e = CaseStmts.size(); i != e; ++i)
         EmitStmt(CaseStmts[i]);
+      RegionCounter ExitCnt = getPGORegionCounter(&S);
+      ExitCnt.beginRegion(Builder);
 
       // Now we want to restore the saved switch instance so that nested
       // switches continue to function properly
@@ -1282,18 +1418,41 @@ void CodeGenFunction::EmitSwitchStmt(con
   // failure.
   llvm::BasicBlock *DefaultBlock = createBasicBlock("sw.default");
   SwitchInsn = Builder.CreateSwitch(CondV, DefaultBlock);
+  if (PGO.haveRegionCounts()) {
+    // Walk the SwitchCase list to find how many there are.
+    uint64_t DefaultCount = 0;
+    unsigned NumCases = 0;
+    for (const SwitchCase *Case = S.getSwitchCaseList();
+         Case;
+         Case = Case->getNextSwitchCase()) {
+      if (isa<DefaultStmt>(Case))
+        DefaultCount = getPGORegionCounter(Case).getCount();
+      NumCases += 1;
+    }
+    SwitchWeights = new SmallVector<uint64_t, 16>();
+    SwitchWeights->reserve(NumCases);
+    // The default needs to be first. We store the edge count, so we already
+    // know the right weight.
+    SwitchWeights->push_back(DefaultCount);
+  }
   CaseRangeBlock = DefaultBlock;
 
   // Clear the insertion point to indicate we are in unreachable code.
   Builder.ClearInsertionPoint();
+  PGO.setCurrentRegionCount(0);
 
   // All break statements jump to NextBlock. If BreakContinueStack is non-empty
-  // then reuse last ContinueBlock.
+  // then reuse last ContinueBlock and that block's counter.
   JumpDest OuterContinue;
-  if (!BreakContinueStack.empty())
-    OuterContinue = BreakContinueStack.back().ContinueBlock;
+  RegionCounter *OuterCount = 0;
+  if (!BreakContinueStack.empty()) {
+    BreakContinue &BC = BreakContinueStack.back();
+    OuterContinue = BC.ContinueBlock;
+    OuterCount = BC.LoopCnt;
+  }
 
-  BreakContinueStack.push_back(BreakContinue(SwitchExit, OuterContinue));
+  BreakContinueStack.push_back(BreakContinue(SwitchExit, OuterContinue,
+                                             OuterCount, /*CountBreak=*/false));
 
   // Emit switch body.
   EmitStmt(S.getBody());
@@ -1322,8 +1481,20 @@ void CodeGenFunction::EmitSwitchStmt(con
 
   // Emit continuation.
   EmitBlock(SwitchExit.getBlock(), true);
+  RegionCounter ExitCnt = getPGORegionCounter(&S);
+  ExitCnt.beginRegion(Builder);
 
+  if (SwitchWeights) {
+    assert(SwitchWeights->size() == 1 + SwitchInsn->getNumCases() &&
+           "switch weights do not match switch cases");
+    // If there's only one jump destination there's no sense weighting it.
+    if (SwitchWeights->size() > 1)
+      SwitchInsn->setMetadata(llvm::LLVMContext::MD_prof,
+                              PGO.createBranchWeights(*SwitchWeights));
+    delete SwitchWeights;
+  }
   SwitchInsn = SavedSwitchInsn;
+  SwitchWeights = SavedSwitchWeights;
   CaseRangeBlock = SavedCRBlock;
 }
 

Modified: cfe/trunk/lib/CodeGen/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CMakeLists.txt?rev=198640&r1=198639&r2=198640&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CMakeLists.txt (original)
+++ cfe/trunk/lib/CodeGen/CMakeLists.txt Mon Jan  6 16:27:43 2014
@@ -53,6 +53,7 @@ add_clang_library(clangCodeGen
   CodeGenModule.cpp
   CodeGenTBAA.cpp
   CodeGenTypes.cpp
+  CodeGenPGO.cpp
   ItaniumCXXABI.cpp
   MicrosoftCXXABI.cpp
   ModuleBuilder.cpp

Modified: cfe/trunk/lib/CodeGen/CodeGenFunction.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CodeGenFunction.cpp?rev=198640&r1=198639&r2=198640&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CodeGenFunction.cpp (original)
+++ cfe/trunk/lib/CodeGen/CodeGenFunction.cpp Mon Jan  6 16:27:43 2014
@@ -16,6 +16,7 @@
 #include "CGCXXABI.h"
 #include "CGDebugInfo.h"
 #include "CodeGenModule.h"
+#include "CodeGenPGO.h"
 #include "TargetInfo.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/Decl.h"
@@ -44,7 +45,8 @@ CodeGenFunction::CodeGenFunction(CodeGen
       NextCleanupDestIndex(1), FirstBlockInfo(0), EHResumeBlock(0),
       ExceptionSlot(0), EHSelectorSlot(0), DebugInfo(CGM.getModuleDebugInfo()),
       DisableDebugInfo(false), DidCallStackSave(false), IndirectBranch(0),
-      SwitchInsn(0), CaseRangeBlock(0), UnreachableBlock(0), NumReturnExprs(0),
+      PGO(cgm), SwitchInsn(0), SwitchWeights(0),
+      CaseRangeBlock(0), UnreachableBlock(0), NumReturnExprs(0),
       NumSimpleReturnExprs(0), CXXABIThisDecl(0), CXXABIThisValue(0),
       CXXThisValue(0), CXXDefaultInitExprThis(0),
       CXXStructorImplicitParamDecl(0), CXXStructorImplicitParamValue(0),
@@ -571,6 +573,8 @@ void CodeGenFunction::StartFunction(Glob
   if (CGM.getCodeGenOpts().InstrumentForProfiling)
     EmitMCountInstrumentation();
 
+  PGO.assignRegionCounters(GD);
+
   if (RetTy->isVoidType()) {
     // Void type; nothing to return.
     ReturnValue = 0;
@@ -643,6 +647,8 @@ void CodeGenFunction::StartFunction(Glob
 
 void CodeGenFunction::EmitFunctionBody(FunctionArgList &Args,
                                        const Stmt *Body) {
+  RegionCounter Cnt = getPGORegionCounter(Body);
+  Cnt.beginRegion(Builder);
   if (const CompoundStmt *S = dyn_cast<CompoundStmt>(Body))
     EmitCompoundStmtWithoutScope(*S);
   else
@@ -772,6 +778,9 @@ void CodeGenFunction::GenerateCode(Globa
   // a quick pass now to see if we can.
   if (!CurFn->doesNotThrow())
     TryMarkNoThrow(CurFn);
+
+  PGO.emitWriteoutFunction(CurGD);
+  PGO.destroyRegionCounters();
 }
 
 /// ContainsLabel - Return true if the statement contains a label in it.  If
@@ -870,10 +879,13 @@ ConstantFoldsToSimpleInteger(const Expr
 ///
 void CodeGenFunction::EmitBranchOnBoolExpr(const Expr *Cond,
                                            llvm::BasicBlock *TrueBlock,
-                                           llvm::BasicBlock *FalseBlock) {
+                                           llvm::BasicBlock *FalseBlock,
+                                           uint64_t TrueCount) {
   Cond = Cond->IgnoreParens();
 
   if (const BinaryOperator *CondBOp = dyn_cast<BinaryOperator>(Cond)) {
+    RegionCounter Cnt = getPGORegionCounter(CondBOp);
+
     // Handle X && Y in a condition.
     if (CondBOp->getOpcode() == BO_LAnd) {
       // If we have "1 && X", simplify the code.  "0 && X" would have constant
@@ -882,7 +894,9 @@ void CodeGenFunction::EmitBranchOnBoolEx
       if (ConstantFoldsToSimpleInteger(CondBOp->getLHS(), ConstantBool) &&
           ConstantBool) {
         // br(1 && X) -> br(X).
-        return EmitBranchOnBoolExpr(CondBOp->getRHS(), TrueBlock, FalseBlock);
+        Cnt.beginRegion(Builder);
+        return EmitBranchOnBoolExpr(CondBOp->getRHS(), TrueBlock, FalseBlock,
+                                    TrueCount);
       }
 
       // If we have "X && 1", simplify the code to use an uncond branch.
@@ -890,21 +904,28 @@ void CodeGenFunction::EmitBranchOnBoolEx
       if (ConstantFoldsToSimpleInteger(CondBOp->getRHS(), ConstantBool) &&
           ConstantBool) {
         // br(X && 1) -> br(X).
-        return EmitBranchOnBoolExpr(CondBOp->getLHS(), TrueBlock, FalseBlock);
+        return EmitBranchOnBoolExpr(CondBOp->getLHS(), TrueBlock, FalseBlock,
+                                    TrueCount);
       }
 
       // Emit the LHS as a conditional.  If the LHS conditional is false, we
       // want to jump to the FalseBlock.
       llvm::BasicBlock *LHSTrue = createBasicBlock("land.lhs.true");
+      // The counter tells us how often we evaluate RHS, and all of TrueCount
+      // can be propagated to that branch.
+      uint64_t RHSCount = Cnt.getCount();
 
       ConditionalEvaluation eval(*this);
-      EmitBranchOnBoolExpr(CondBOp->getLHS(), LHSTrue, FalseBlock);
+      EmitBranchOnBoolExpr(CondBOp->getLHS(), LHSTrue, FalseBlock, RHSCount);
       EmitBlock(LHSTrue);
 
       // Any temporaries created here are conditional.
+      Cnt.beginRegion(Builder);
       eval.begin(*this);
-      EmitBranchOnBoolExpr(CondBOp->getRHS(), TrueBlock, FalseBlock);
+      EmitBranchOnBoolExpr(CondBOp->getRHS(), TrueBlock, FalseBlock, TrueCount);
       eval.end(*this);
+      Cnt.adjustFallThroughCount();
+      Cnt.applyAdjustmentsToRegion();
 
       return;
     }
@@ -916,7 +937,9 @@ void CodeGenFunction::EmitBranchOnBoolEx
       if (ConstantFoldsToSimpleInteger(CondBOp->getLHS(), ConstantBool) &&
           !ConstantBool) {
         // br(0 || X) -> br(X).
-        return EmitBranchOnBoolExpr(CondBOp->getRHS(), TrueBlock, FalseBlock);
+        Cnt.beginRegion(Builder);
+        return EmitBranchOnBoolExpr(CondBOp->getRHS(), TrueBlock, FalseBlock,
+                                    TrueCount);
       }
 
       // If we have "X || 0", simplify the code to use an uncond branch.
@@ -924,21 +947,31 @@ void CodeGenFunction::EmitBranchOnBoolEx
       if (ConstantFoldsToSimpleInteger(CondBOp->getRHS(), ConstantBool) &&
           !ConstantBool) {
         // br(X || 0) -> br(X).
-        return EmitBranchOnBoolExpr(CondBOp->getLHS(), TrueBlock, FalseBlock);
+        return EmitBranchOnBoolExpr(CondBOp->getLHS(), TrueBlock, FalseBlock,
+                                    TrueCount);
       }
 
       // Emit the LHS as a conditional.  If the LHS conditional is true, we
       // want to jump to the TrueBlock.
       llvm::BasicBlock *LHSFalse = createBasicBlock("lor.lhs.false");
+      // We have the count for entry to the RHS and for the whole expression
+      // being true, so we can divy up True count between the short circuit and
+      // the RHS.
+      uint64_t LHSCount = TrueCount - Cnt.getCount();
+      uint64_t RHSCount = TrueCount - LHSCount;
 
       ConditionalEvaluation eval(*this);
-      EmitBranchOnBoolExpr(CondBOp->getLHS(), TrueBlock, LHSFalse);
+      EmitBranchOnBoolExpr(CondBOp->getLHS(), TrueBlock, LHSFalse, LHSCount);
       EmitBlock(LHSFalse);
 
       // Any temporaries created here are conditional.
+      Cnt.beginRegion(Builder);
       eval.begin(*this);
-      EmitBranchOnBoolExpr(CondBOp->getRHS(), TrueBlock, FalseBlock);
+      EmitBranchOnBoolExpr(CondBOp->getRHS(), TrueBlock, FalseBlock, RHSCount);
+
       eval.end(*this);
+      Cnt.adjustFallThroughCount();
+      Cnt.applyAdjustmentsToRegion();
 
       return;
     }
@@ -946,8 +979,13 @@ void CodeGenFunction::EmitBranchOnBoolEx
 
   if (const UnaryOperator *CondUOp = dyn_cast<UnaryOperator>(Cond)) {
     // br(!x, t, f) -> br(x, f, t)
-    if (CondUOp->getOpcode() == UO_LNot)
-      return EmitBranchOnBoolExpr(CondUOp->getSubExpr(), FalseBlock, TrueBlock);
+    if (CondUOp->getOpcode() == UO_LNot) {
+      // Negate the count.
+      uint64_t FalseCount = PGO.getCurrentRegionCount() - TrueCount;
+      // Negate the condition and swap the destination blocks.
+      return EmitBranchOnBoolExpr(CondUOp->getSubExpr(), FalseBlock, TrueBlock,
+                                  FalseCount);
+    }
   }
 
   if (const ConditionalOperator *CondOp = dyn_cast<ConditionalOperator>(Cond)) {
@@ -955,17 +993,33 @@ void CodeGenFunction::EmitBranchOnBoolEx
     llvm::BasicBlock *LHSBlock = createBasicBlock("cond.true");
     llvm::BasicBlock *RHSBlock = createBasicBlock("cond.false");
 
+    RegionCounter Cnt = getPGORegionCounter(CondOp);
     ConditionalEvaluation cond(*this);
-    EmitBranchOnBoolExpr(CondOp->getCond(), LHSBlock, RHSBlock);
+    EmitBranchOnBoolExpr(CondOp->getCond(), LHSBlock, RHSBlock, Cnt.getCount());
+
+    // When computing PGO branch weights, we only know the overall count for
+    // the true block. This code is essentially doing tail duplication of the
+    // naive code-gen, introducing new edges for which counts are not
+    // available. Divide the counts proportionally between the LHS and RHS of
+    // the conditional operator.
+    uint64_t LHSScaledTrueCount = 0;
+    if (TrueCount) {
+      double LHSRatio = Cnt.getCount() / (double) PGO.getCurrentRegionCount();
+      LHSScaledTrueCount = TrueCount * LHSRatio;
+    }
 
     cond.begin(*this);
     EmitBlock(LHSBlock);
-    EmitBranchOnBoolExpr(CondOp->getLHS(), TrueBlock, FalseBlock);
+    Cnt.beginRegion(Builder);
+    EmitBranchOnBoolExpr(CondOp->getLHS(), TrueBlock, FalseBlock,
+                         LHSScaledTrueCount);
     cond.end(*this);
 
     cond.begin(*this);
     EmitBlock(RHSBlock);
-    EmitBranchOnBoolExpr(CondOp->getRHS(), TrueBlock, FalseBlock);
+    Cnt.beginElseRegion();
+    EmitBranchOnBoolExpr(CondOp->getRHS(), TrueBlock, FalseBlock,
+                         TrueCount - LHSScaledTrueCount);
     cond.end(*this);
 
     return;
@@ -981,9 +1035,15 @@ void CodeGenFunction::EmitBranchOnBoolEx
     return;
   }
 
+  // Create branch weights based on the number of times we get here and the
+  // number of times the condition should be true.
+  uint64_t CurrentCount = PGO.getCurrentRegionCountWithMin(TrueCount);
+  llvm::MDNode *Weights = PGO.createBranchWeights(TrueCount,
+                                                  CurrentCount - TrueCount);
+
   // Emit the code with the fully general case.
   llvm::Value *CondV = EvaluateExprAsBool(Cond);
-  Builder.CreateCondBr(CondV, TrueBlock, FalseBlock);
+  Builder.CreateCondBr(CondV, TrueBlock, FalseBlock, Weights);
 }
 
 /// ErrorUnsupported - Print out an error that codegen doesn't support the

Modified: cfe/trunk/lib/CodeGen/CodeGenFunction.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CodeGenFunction.h?rev=198640&r1=198639&r2=198640&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CodeGenFunction.h (original)
+++ cfe/trunk/lib/CodeGen/CodeGenFunction.h Mon Jan  6 16:27:43 2014
@@ -19,6 +19,7 @@
 #include "CGValue.h"
 #include "EHScopeStack.h"
 #include "CodeGenModule.h"
+#include "CodeGenPGO.h"
 #include "clang/AST/CharUnits.h"
 #include "clang/AST/ExprCXX.h"
 #include "clang/AST/ExprObjC.h"
@@ -817,19 +818,36 @@ private:
   llvm::DenseMap<const LabelDecl*, JumpDest> LabelMap;
 
   // BreakContinueStack - This keeps track of where break and continue
-  // statements should jump to.
+  // statements should jump to and the associated base counter for
+  // instrumentation.
   struct BreakContinue {
-    BreakContinue(JumpDest Break, JumpDest Continue)
-      : BreakBlock(Break), ContinueBlock(Continue) {}
+    BreakContinue(JumpDest Break, JumpDest Continue, RegionCounter *LoopCnt,
+                  bool CountBreak = true)
+      : BreakBlock(Break), ContinueBlock(Continue), LoopCnt(LoopCnt),
+        CountBreak(CountBreak) {}
 
     JumpDest BreakBlock;
     JumpDest ContinueBlock;
+    RegionCounter *LoopCnt;
+    bool CountBreak;
   };
   SmallVector<BreakContinue, 8> BreakContinueStack;
 
+  CodeGenPGO PGO;
+
+public:
+  /// Get a counter for instrumentation of the region associated with the given
+  /// statement.
+  RegionCounter getPGORegionCounter(const Stmt *S) {
+    return RegionCounter(PGO, S);
+  }
+private:
+
   /// SwitchInsn - This is nearest current switch instruction. It is null if
   /// current context is not in a switch.
   llvm::SwitchInst *SwitchInsn;
+  /// The branch weights of SwitchInsn when doing instrumentation based PGO.
+  SmallVector<uint64_t, 16> *SwitchWeights;
 
   /// CaseRangeBlock - This block holds if condition check for last case
   /// statement range in current switch instruction.
@@ -2413,8 +2431,10 @@ public:
   /// EmitBranchOnBoolExpr - Emit a branch on a boolean condition (e.g. for an
   /// if statement) to the specified blocks.  Based on the condition, this might
   /// try to simplify the codegen of the conditional based on the branch.
+  /// TrueCount should be the number of times we expect the condition to
+  /// evaluate to true based on PGO data.
   void EmitBranchOnBoolExpr(const Expr *Cond, llvm::BasicBlock *TrueBlock,
-                            llvm::BasicBlock *FalseBlock);
+                            llvm::BasicBlock *FalseBlock, uint64_t TrueCount);
 
   /// \brief Emit a description of a type in a format suitable for passing to
   /// a runtime sanitizer handler.

Modified: cfe/trunk/lib/CodeGen/CodeGenModule.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CodeGenModule.cpp?rev=198640&r1=198639&r2=198640&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CodeGenModule.cpp (original)
+++ cfe/trunk/lib/CodeGen/CodeGenModule.cpp Mon Jan  6 16:27:43 2014
@@ -20,6 +20,7 @@
 #include "CGOpenCLRuntime.h"
 #include "CodeGenFunction.h"
 #include "CodeGenTBAA.h"
+#include "CodeGenPGO.h"
 #include "TargetInfo.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/CharUnits.h"
@@ -77,7 +78,8 @@ CodeGenModule::CodeGenModule(ASTContext
       ABI(createCXXABI(*this)), VMContext(M.getContext()), TBAA(0),
       TheTargetCodeGenInfo(0), Types(*this), VTables(*this), ObjCRuntime(0),
       OpenCLRuntime(0), CUDARuntime(0), DebugInfo(0), ARCData(0),
-      NoObjCARCExceptionsMetadata(0), RRData(0), CFConstantStringClassRef(0),
+      NoObjCARCExceptionsMetadata(0), RRData(0), PGOData(0),
+      CFConstantStringClassRef(0),
       ConstantStringClassRef(0), NSConstantStringType(0),
       NSConcreteGlobalBlock(0), NSConcreteStackBlock(0), BlockObjectAssign(0),
       BlockObjectDispose(0), BlockDescriptorType(0), GenericBlockLiteralType(0),
@@ -131,6 +133,9 @@ CodeGenModule::CodeGenModule(ASTContext
   if (C.getLangOpts().ObjCAutoRefCount)
     ARCData = new ARCEntrypoints();
   RRData = new RREntrypoints();
+
+  if (!CodeGenOpts.InstrProfileInput.empty())
+    PGOData = new PGOProfileData(*this, CodeGenOpts.InstrProfileInput);
 }
 
 CodeGenModule::~CodeGenModule() {
@@ -2181,6 +2186,10 @@ void CodeGenModule::EmitGlobalFunctionDe
     AddGlobalDtor(Fn, DA->getPriority());
   if (D->hasAttr<AnnotateAttr>())
     AddGlobalAnnotations(D, Fn);
+
+  llvm::Function *PGOInit = CodeGenPGO::emitInitialization(*this);
+  if (PGOInit)
+    AddGlobalCtor(PGOInit, 0);
 }
 
 void CodeGenModule::EmitAliasDefinition(GlobalDecl GD) {

Modified: cfe/trunk/lib/CodeGen/CodeGenModule.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CodeGenModule.h?rev=198640&r1=198639&r2=198640&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CodeGenModule.h (original)
+++ cfe/trunk/lib/CodeGen/CodeGenModule.h Mon Jan  6 16:27:43 2014
@@ -85,7 +85,8 @@ namespace CodeGen {
   class CGCUDARuntime;
   class BlockFieldFlags;
   class FunctionArgList;
-  
+  class PGOProfileData;
+
   struct OrderGlobalInits {
     unsigned int priority;
     unsigned int lex_order;
@@ -258,6 +259,7 @@ class CodeGenModule : public CodeGenType
   ARCEntrypoints *ARCData;
   llvm::MDNode *NoObjCARCExceptionsMetadata;
   RREntrypoints *RRData;
+  PGOProfileData *PGOData;
 
   // WeakRefReferences - A set of references that have only been seen via
   // a weakref so far. This is used to remove the weak of the reference if we
@@ -479,6 +481,10 @@ public:
     return *RRData;
   }
 
+  PGOProfileData *getPGOData() const {
+    return PGOData;
+  }
+
   llvm::Constant *getStaticLocalDeclAddress(const VarDecl *D) {
     return StaticLocalDeclMap[D];
   }

Added: cfe/trunk/lib/CodeGen/CodeGenPGO.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CodeGenPGO.cpp?rev=198640&view=auto
==============================================================================
--- cfe/trunk/lib/CodeGen/CodeGenPGO.cpp (added)
+++ cfe/trunk/lib/CodeGen/CodeGenPGO.cpp Mon Jan  6 16:27:43 2014
@@ -0,0 +1,456 @@
+//===--- CodeGenPGO.cpp - PGO Instrumentation for LLVM CodeGen --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Instrumentation-based profile-guided optimization
+//
+//===----------------------------------------------------------------------===//
+
+#include "CodeGenPGO.h"
+#include "CodeGenFunction.h"
+#include "clang/AST/RecursiveASTVisitor.h"
+#include "clang/AST/StmtVisitor.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/Support/FileSystem.h"
+
+using namespace clang;
+using namespace CodeGen;
+
+static void ReportBadPGOData(CodeGenModule &CGM, const char *Message) {
+  DiagnosticsEngine &Diags = CGM.getDiags();
+  unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error, Message);
+  Diags.Report(DiagID);
+}
+
+PGOProfileData::PGOProfileData(CodeGenModule &CGM, std::string Path)
+  : CGM(CGM) {
+  if (llvm::MemoryBuffer::getFile(Path, DataBuffer)) {
+    ReportBadPGOData(CGM, "failed to open pgo data file");
+    return;
+  }
+
+  if (DataBuffer->getBufferSize() > std::numeric_limits<unsigned>::max()) {
+    ReportBadPGOData(CGM, "pgo data file too big");
+    return;
+  }
+
+  // Scan through the data file and map each function to the corresponding
+  // file offset where its counts are stored.
+  const char *BufferStart = DataBuffer->getBufferStart();
+  const char *BufferEnd = DataBuffer->getBufferEnd();
+  const char *CurPtr = BufferStart;
+  while (CurPtr < BufferEnd) {
+    // Read the mangled function name.
+    const char *FuncName = CurPtr;
+    // FIXME: Something will need to be added to distinguish static functions.
+    CurPtr = strchr(CurPtr, ' ');
+    if (!CurPtr) {
+      ReportBadPGOData(CGM, "pgo data file has malformed function entry");
+      return;
+    }
+    StringRef MangledName(FuncName, CurPtr - FuncName);
+
+    // Read the number of counters.
+    char *EndPtr;
+    unsigned NumCounters = strtol(++CurPtr, &EndPtr, 10);
+    if (EndPtr == CurPtr || *EndPtr != '\n' || NumCounters <= 0) {
+      ReportBadPGOData(CGM, "pgo data file has unexpected number of counters");
+      return;
+    }
+    CurPtr = EndPtr;
+
+    // There is one line for each counter; skip over those lines.
+    for (unsigned N = 0; N < NumCounters; ++N) {
+      CurPtr = strchr(++CurPtr, '\n');
+      if (!CurPtr) {
+        ReportBadPGOData(CGM, "pgo data file is missing some counter info");
+        return;
+      }
+    }
+
+    // Skip over the blank line separating functions.
+    CurPtr += 2;
+
+    DataOffsets[MangledName] = FuncName - BufferStart;
+  }
+}
+
+bool PGOProfileData::getFunctionCounts(StringRef MangledName,
+                                       std::vector<uint64_t> &Counts) {
+  // Find the relevant section of the pgo-data file.
+  llvm::StringMap<unsigned>::const_iterator OffsetIter =
+    DataOffsets.find(MangledName);
+  if (OffsetIter == DataOffsets.end())
+    return true;
+  const char *CurPtr = DataBuffer->getBufferStart() + OffsetIter->getValue();
+
+  // Skip over the function name.
+  CurPtr = strchr(CurPtr, ' ');
+  assert(CurPtr && "pgo-data has corrupted function entry");
+
+  // Read the number of counters.
+  char *EndPtr;
+  unsigned NumCounters = strtol(++CurPtr, &EndPtr, 10);
+  assert(EndPtr != CurPtr && *EndPtr == '\n' && NumCounters > 0 &&
+         "pgo-data file has corrupted number of counters");
+  CurPtr = EndPtr;
+
+  Counts.reserve(NumCounters);
+
+  for (unsigned N = 0; N < NumCounters; ++N) {
+    // Read the count value.
+    uint64_t Count = strtoll(CurPtr, &EndPtr, 10);
+    if (EndPtr == CurPtr || *EndPtr != '\n') {
+      ReportBadPGOData(CGM, "pgo-data file has bad count value");
+      return true;
+    }
+    Counts.push_back(Count);
+    CurPtr = EndPtr + 1;
+  }
+
+  // Make sure the number of counters matches up.
+  if (Counts.size() != NumCounters) {
+    ReportBadPGOData(CGM, "pgo-data file has inconsistent counters");
+    return true;
+  }
+
+  return false;
+}
+
+void CodeGenPGO::emitWriteoutFunction(GlobalDecl &GD) {
+  if (!CGM.getCodeGenOpts().ProfileInstrGenerate)
+    return;
+
+  llvm::LLVMContext &Ctx = CGM.getLLVMContext();
+
+  llvm::Type *Int32Ty = llvm::Type::getInt32Ty(Ctx);
+  llvm::Type *Int8PtrTy = llvm::Type::getInt8PtrTy(Ctx);
+
+  llvm::Function *WriteoutF =
+    CGM.getModule().getFunction("__llvm_pgo_writeout");
+  if (!WriteoutF) {
+    llvm::FunctionType *WriteoutFTy =
+      llvm::FunctionType::get(llvm::Type::getVoidTy(Ctx), false);
+    WriteoutF = llvm::Function::Create(WriteoutFTy,
+                                       llvm::GlobalValue::InternalLinkage,
+                                       "__llvm_pgo_writeout", &CGM.getModule());
+  }
+  WriteoutF->setUnnamedAddr(true);
+  WriteoutF->addFnAttr(llvm::Attribute::NoInline);
+  if (CGM.getCodeGenOpts().DisableRedZone)
+    WriteoutF->addFnAttr(llvm::Attribute::NoRedZone);
+
+  llvm::BasicBlock *BB = WriteoutF->empty() ?
+    llvm::BasicBlock::Create(Ctx, "", WriteoutF) : &WriteoutF->getEntryBlock();
+
+  CGBuilderTy PGOBuilder(BB);
+
+  llvm::Instruction *I = BB->getTerminator();
+  if (!I)
+    I = PGOBuilder.CreateRetVoid();
+  PGOBuilder.SetInsertPoint(I);
+
+  llvm::Type *Int64PtrTy = llvm::Type::getInt64PtrTy(Ctx);
+  llvm::Type *Args[] = {
+    Int8PtrTy,                       // const char *MangledName
+    Int32Ty,                         // uint32_t NumCounters
+    Int64PtrTy                       // uint64_t *Counters
+  };
+  llvm::FunctionType *FTy =
+    llvm::FunctionType::get(PGOBuilder.getVoidTy(), Args, false);
+  llvm::Constant *EmitFunc =
+    CGM.getModule().getOrInsertFunction("llvm_pgo_emit", FTy);
+
+  llvm::Constant *MangledName =
+    CGM.GetAddrOfConstantCString(CGM.getMangledName(GD), "__llvm_pgo_name");
+  MangledName = llvm::ConstantExpr::getBitCast(MangledName, Int8PtrTy);
+  PGOBuilder.CreateCall3(EmitFunc, MangledName,
+                         PGOBuilder.getInt32(NumRegionCounters),
+                         PGOBuilder.CreateBitCast(RegionCounters, Int64PtrTy));
+}
+
+llvm::Function *CodeGenPGO::emitInitialization(CodeGenModule &CGM) {
+  llvm::Function *WriteoutF =
+    CGM.getModule().getFunction("__llvm_pgo_writeout");
+  if (!WriteoutF)
+    return NULL;
+
+  // Create a small bit of code that registers the "__llvm_pgo_writeout" to
+  // be executed at exit.
+  llvm::Function *F = CGM.getModule().getFunction("__llvm_pgo_init");
+  if (F)
+    return NULL;
+
+  llvm::LLVMContext &Ctx = CGM.getLLVMContext();
+  llvm::FunctionType *FTy = llvm::FunctionType::get(llvm::Type::getVoidTy(Ctx),
+                                                    false);
+  F = llvm::Function::Create(FTy, llvm::GlobalValue::InternalLinkage,
+                             "__llvm_pgo_init", &CGM.getModule());
+  F->setUnnamedAddr(true);
+  F->setLinkage(llvm::GlobalValue::InternalLinkage);
+  F->addFnAttr(llvm::Attribute::NoInline);
+  if (CGM.getCodeGenOpts().DisableRedZone)
+    F->addFnAttr(llvm::Attribute::NoRedZone);
+
+  llvm::BasicBlock *BB = llvm::BasicBlock::Create(CGM.getLLVMContext(), "", F);
+  CGBuilderTy PGOBuilder(BB);
+
+  FTy = llvm::FunctionType::get(PGOBuilder.getVoidTy(), false);
+  llvm::Type *Params[] = {
+    llvm::PointerType::get(FTy, 0)
+  };
+  FTy = llvm::FunctionType::get(PGOBuilder.getVoidTy(), Params, false);
+
+  // Inialize the environment and register the local writeout function.
+  llvm::Constant *PGOInit =
+    CGM.getModule().getOrInsertFunction("llvm_pgo_init", FTy);
+  PGOBuilder.CreateCall(PGOInit, WriteoutF);
+  PGOBuilder.CreateRetVoid();
+
+  return F;
+}
+
+namespace {
+  /// A StmtVisitor that fills a map of statements to PGO counters.
+  struct MapRegionCounters : public ConstStmtVisitor<MapRegionCounters> {
+    /// The next counter value to assign.
+    unsigned NextCounter;
+    /// The map of statements to counters.
+    llvm::DenseMap<const Stmt*, unsigned> *CounterMap;
+
+    MapRegionCounters(llvm::DenseMap<const Stmt*, unsigned> *CounterMap) :
+      NextCounter(0), CounterMap(CounterMap) {
+    }
+
+    void VisitChildren(const Stmt *S) {
+      for (Stmt::const_child_range I = S->children(); I; ++I)
+        if (*I)
+         this->Visit(*I);
+    }
+    void VisitStmt(const Stmt *S) { VisitChildren(S); }
+
+    /// Assign a counter to track entry to the function body
+    void VisitFunctionDecl(const FunctionDecl *S) {
+      (*CounterMap)[S->getBody()] = NextCounter++;
+      Visit(S->getBody());
+    }
+    /// Assign a counter to track the block following a label
+    void VisitLabelStmt(const LabelStmt *S) {
+      (*CounterMap)[S] = NextCounter++;
+      Visit(S->getSubStmt());
+    }
+    /// Assign three counters - one for the body of the loop, one for breaks
+    /// from the loop, and one for continues.
+    ///
+    /// The break and continue counters cover all such statements in this loop,
+    /// and are used in calculations to find the number of times the condition
+    /// and exit of the loop occur. They are needed so we can differentiate
+    /// these statements from non-local exits like return and goto.
+    void VisitWhileStmt(const WhileStmt *S) {
+      (*CounterMap)[S] = NextCounter;
+      NextCounter += 3;
+      Visit(S->getCond());
+      Visit(S->getBody());
+    }
+    /// Assign counters for the body of the loop, and for breaks and
+    /// continues. See VisitWhileStmt.
+    void VisitDoStmt(const DoStmt *S) {
+      (*CounterMap)[S] = NextCounter;
+      NextCounter += 3;
+      Visit(S->getBody());
+      Visit(S->getCond());
+    }
+    /// Assign counters for the body of the loop, and for breaks and
+    /// continues. See VisitWhileStmt.
+    void VisitForStmt(const ForStmt *S) {
+      (*CounterMap)[S] = NextCounter;
+      NextCounter += 3;
+      const Expr *E;
+      if ((E = S->getCond()))
+        Visit(E);
+      Visit(S->getBody());
+      if ((E = S->getInc()))
+        Visit(E);
+    }
+    /// Assign counters for the body of the loop, and for breaks and
+    /// continues. See VisitWhileStmt.
+    void VisitCXXForRangeStmt(const CXXForRangeStmt *S) {
+      (*CounterMap)[S] = NextCounter;
+      NextCounter += 3;
+      const Expr *E;
+      if ((E = S->getCond()))
+        Visit(E);
+      Visit(S->getBody());
+      if ((E = S->getInc()))
+        Visit(E);
+    }
+    /// Assign counters for the body of the loop, and for breaks and
+    /// continues. See VisitWhileStmt.
+    void VisitObjCForCollectionStmt(const ObjCForCollectionStmt *S) {
+      (*CounterMap)[S] = NextCounter;
+      NextCounter += 3;
+      Visit(S->getElement());
+      Visit(S->getBody());
+    }
+    /// Assign a counter for the exit block of the switch statement.
+    void VisitSwitchStmt(const SwitchStmt *S) {
+      (*CounterMap)[S] = NextCounter++;
+      Visit(S->getCond());
+      Visit(S->getBody());
+    }
+    /// Assign a counter for a particular case in a switch. This counts jumps
+    /// from the switch header as well as fallthrough from the case before this
+    /// one.
+    void VisitCaseStmt(const CaseStmt *S) {
+      (*CounterMap)[S] = NextCounter++;
+      Visit(S->getSubStmt());
+    }
+    /// Assign a counter for the default case of a switch statement. The count
+    /// is the number of branches from the loop header to the default, and does
+    /// not include fallthrough from previous cases. If we have multiple
+    /// conditional branch blocks from the switch instruction to the default
+    /// block, as with large GNU case ranges, this is the counter for the last
+    /// edge in that series, rather than the first.
+    void VisitDefaultStmt(const DefaultStmt *S) {
+      (*CounterMap)[S] = NextCounter++;
+      Visit(S->getSubStmt());
+    }
+    /// Assign a counter for the "then" part of an if statement. The count for
+    /// the "else" part, if it exists, will be calculated from this counter.
+    void VisitIfStmt(const IfStmt *S) {
+      (*CounterMap)[S] = NextCounter++;
+      Visit(S->getCond());
+      Visit(S->getThen());
+      if (S->getElse())
+        Visit(S->getElse());
+    }
+    /// Assign a counter for the continuation block of a C++ try statement.
+    void VisitCXXTryStmt(const CXXTryStmt *S) {
+      (*CounterMap)[S] = NextCounter++;
+      Visit(S->getTryBlock());
+      for (unsigned I = 0, E = S->getNumHandlers(); I < E; ++I)
+        Visit(S->getHandler(I));
+    }
+    /// Assign a counter for a catch statement's handler block.
+    void VisitCXXCatchStmt(const CXXCatchStmt *S) {
+      (*CounterMap)[S] = NextCounter++;
+      Visit(S->getHandlerBlock());
+    }
+    /// Assign a counter for the "true" part of a conditional operator. The
+    /// count in the "false" part will be calculated from this counter.
+    void VisitConditionalOperator(const ConditionalOperator *E) {
+      (*CounterMap)[E] = NextCounter++;
+      Visit(E->getCond());
+      Visit(E->getTrueExpr());
+      Visit(E->getFalseExpr());
+    }
+    /// Assign a counter for the right hand side of a logical and operator.
+    void VisitBinLAnd(const BinaryOperator *E) {
+      (*CounterMap)[E] = NextCounter++;
+      Visit(E->getLHS());
+      Visit(E->getRHS());
+    }
+    /// Assign a counter for the right hand side of a logical or operator.
+    void VisitBinLOr(const BinaryOperator *E) {
+      (*CounterMap)[E] = NextCounter++;
+      Visit(E->getLHS());
+      Visit(E->getRHS());
+    }
+  };
+}
+
+void CodeGenPGO::assignRegionCounters(GlobalDecl &GD) {
+  bool InstrumentRegions = CGM.getCodeGenOpts().ProfileInstrGenerate;
+  PGOProfileData *PGOData = CGM.getPGOData();
+  if (!InstrumentRegions && !PGOData)
+    return;
+  const Decl *D = GD.getDecl();
+  if (!D)
+    return;
+  mapRegionCounters(D);
+  if (InstrumentRegions)
+    emitCounterVariables();
+  if (PGOData)
+    loadRegionCounts(GD, PGOData);
+}
+
+void CodeGenPGO::mapRegionCounters(const Decl *D) {
+  RegionCounterMap = new llvm::DenseMap<const Stmt*, unsigned>();
+  MapRegionCounters Walker(RegionCounterMap);
+  if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D))
+    Walker.VisitFunctionDecl(FD);
+  NumRegionCounters = Walker.NextCounter;
+}
+
+void CodeGenPGO::emitCounterVariables() {
+  llvm::LLVMContext &Ctx = CGM.getLLVMContext();
+  llvm::ArrayType *CounterTy = llvm::ArrayType::get(llvm::Type::getInt64Ty(Ctx),
+                                                    NumRegionCounters);
+  RegionCounters =
+    new llvm::GlobalVariable(CGM.getModule(), CounterTy, false,
+                             llvm::GlobalVariable::PrivateLinkage,
+                             llvm::Constant::getNullValue(CounterTy),
+                             "__llvm_pgo_ctr");
+}
+
+void CodeGenPGO::emitCounterIncrement(CGBuilderTy &Builder, unsigned Counter) {
+  if (!CGM.getCodeGenOpts().ProfileInstrGenerate)
+    return;
+  llvm::Value *Addr =
+    Builder.CreateConstInBoundsGEP2_64(RegionCounters, 0, Counter);
+  llvm::Value *Count = Builder.CreateLoad(Addr, "pgocount");
+  Count = Builder.CreateAdd(Count, Builder.getInt64(1));
+  Builder.CreateStore(Count, Addr);
+}
+
+void CodeGenPGO::loadRegionCounts(GlobalDecl &GD, PGOProfileData *PGOData) {
+  // For now, ignore the counts from the PGO data file only if the number of
+  // counters does not match. This could be tightened down in the future to
+  // ignore counts when the input changes in various ways, e.g., by comparing a
+  // hash value based on some characteristics of the input.
+  RegionCounts = new std::vector<uint64_t>();
+  if (PGOData->getFunctionCounts(CGM.getMangledName(GD), *RegionCounts) ||
+      RegionCounts->size() != NumRegionCounters) {
+    delete RegionCounts;
+    RegionCounts = 0;
+  }
+}
+
+void CodeGenPGO::destroyRegionCounters() {
+  if (RegionCounterMap != 0)
+    delete RegionCounterMap;
+  if (RegionCounts != 0)
+    delete RegionCounts;
+}
+
+llvm::MDNode *CodeGenPGO::createBranchWeights(uint64_t TrueCount,
+                                              uint64_t FalseCount) {
+  if (!TrueCount && !FalseCount)
+    return 0;
+
+  llvm::MDBuilder MDHelper(CGM.getLLVMContext());
+  // TODO: need to scale down to 32-bits
+  // According to Laplace's Rule of Succession, it is better to compute the
+  // weight based on the count plus 1.
+  return MDHelper.createBranchWeights(TrueCount + 1, FalseCount + 1);
+}
+
+llvm::MDNode *
+CodeGenPGO::createBranchWeights(ArrayRef<uint64_t> Weights) {
+  llvm::MDBuilder MDHelper(CGM.getLLVMContext());
+  // TODO: need to scale down to 32-bits, instead of just truncating.
+  // According to Laplace's Rule of Succession, it is better to compute the
+  // weight based on the count plus 1.
+  SmallVector<uint32_t, 16> ScaledWeights;
+  ScaledWeights.reserve(Weights.size());
+  for (ArrayRef<uint64_t>::iterator WI = Weights.begin(), WE = Weights.end();
+       WI != WE; ++WI) {
+    ScaledWeights.push_back(*WI + 1);
+  }
+  return MDHelper.createBranchWeights(ScaledWeights);
+}

Added: cfe/trunk/lib/CodeGen/CodeGenPGO.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CodeGenPGO.h?rev=198640&view=auto
==============================================================================
--- cfe/trunk/lib/CodeGen/CodeGenPGO.h (added)
+++ cfe/trunk/lib/CodeGen/CodeGenPGO.h Mon Jan  6 16:27:43 2014
@@ -0,0 +1,216 @@
+//===--- CodeGenPGO.h - PGO Instrumentation for LLVM CodeGen ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Instrumentation-based profile-guided optimization
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CLANG_CODEGEN_CODEGENPGO_H
+#define CLANG_CODEGEN_CODEGENPGO_H
+
+#include "CGBuilder.h"
+#include "CodeGenModule.h"
+#include "CodeGenTypes.h"
+#include "clang/Frontend/CodeGenOptions.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/Support/MemoryBuffer.h"
+
+namespace clang {
+namespace CodeGen {
+class RegionCounter;
+
+/// The raw counter data from an instrumented PGO binary
+class PGOProfileData {
+private:
+  /// The PGO data
+  llvm::OwningPtr<llvm::MemoryBuffer> DataBuffer;
+  /// Offsets into DataBuffer for each function's counters
+  llvm::StringMap<unsigned> DataOffsets;
+  CodeGenModule &CGM;
+public:
+  PGOProfileData(CodeGenModule &CGM, std::string Path);
+  /// Fill Counts with the profile data for the given function name. Returns
+  /// false on success.
+  bool getFunctionCounts(StringRef MangledName, std::vector<uint64_t> &Counts);
+};
+
+/// Per-function PGO state. This class should generally not be used directly,
+/// but instead through the CodeGenFunction and RegionCounter types.
+class CodeGenPGO {
+private:
+  CodeGenModule &CGM;
+
+  unsigned NumRegionCounters;
+  llvm::GlobalVariable *RegionCounters;
+  llvm::DenseMap<const Stmt*, unsigned> *RegionCounterMap;
+  std::vector<uint64_t> *RegionCounts;
+  uint64_t CurrentRegionCount;
+
+public:
+  CodeGenPGO(CodeGenModule &CGM)
+    : CGM(CGM), NumRegionCounters(0), RegionCounters(0), RegionCounterMap(0),
+      RegionCounts(0), CurrentRegionCount(0) {}
+  ~CodeGenPGO() {}
+
+  /// Whether or not we have PGO region data for the current function. This is
+  /// false both when we have no data at all and when our data has been
+  /// discarded.
+  bool haveRegionCounts() const { return RegionCounts != 0; }
+
+  /// Return the counter value of the current region.
+  uint64_t getCurrentRegionCount() const { return CurrentRegionCount; }
+  /// Return the counter value of the current region, or \p Min if it is larger.
+  uint64_t getCurrentRegionCountWithMin(uint64_t Min) {
+    return std::max(Min, CurrentRegionCount);
+  }
+  /// Set the counter value for the current region. This is used to keep track
+  /// of changes to the most recent counter from control flow and non-local
+  /// exits.
+  void setCurrentRegionCount(uint64_t Count) { CurrentRegionCount = Count; }
+
+  /// Calculate branch weights appropriate for PGO data
+  llvm::MDNode *createBranchWeights(uint64_t TrueCount, uint64_t FalseCount);
+  llvm::MDNode *createBranchWeights(ArrayRef<uint64_t> Weights);
+
+  /// Assign counters to regions and configure them for PGO of a given
+  /// function. Does nothing if instrumentation is not enabled and either
+  /// generates global variables or associates PGO data with each of the
+  /// counters depending on whether we are generating or using instrumentation.
+  void assignRegionCounters(GlobalDecl &GD);
+  /// Emit code to write counts for a given function to disk, if necessary.
+  void emitWriteoutFunction(GlobalDecl &GD);
+  /// Clean up region counter state. Must be called if assignRegionCounters is
+  /// used.
+  void destroyRegionCounters();
+  /// Emit the logic to register region counter write out functions. Returns a
+  /// function that implements this logic.
+  static llvm::Function *emitInitialization(CodeGenModule &CGM);
+
+private:
+  void mapRegionCounters(const Decl *D);
+  void loadRegionCounts(GlobalDecl &GD, PGOProfileData *PGOData);
+  void emitCounterVariables();
+
+  /// Emit code to increment the counter at the given index
+  void emitCounterIncrement(CGBuilderTy &Builder, unsigned Counter);
+
+  /// Return the region counter for the given statement. This should only be
+  /// called on statements that have a dedicated counter.
+  unsigned getRegionCounter(const Stmt *S) {
+    if (RegionCounterMap == 0)
+      return 0;
+    return (*RegionCounterMap)[S];
+  }
+
+  /// Return the region count for the counter at the given index.
+  uint64_t getRegionCount(unsigned Counter) {
+    if (!haveRegionCounts())
+      return 0;
+    return (*RegionCounts)[Counter];
+  }
+
+  friend class RegionCounter;
+};
+
+/// A counter for a particular region. This is the primary interface through
+/// which clients manage PGO counters and their values.
+class RegionCounter {
+  CodeGenPGO *PGO;
+  unsigned Counter;
+  uint64_t Count;
+  uint64_t ParentCount;
+  uint64_t RegionCount;
+  int64_t Adjust;
+
+  RegionCounter(CodeGenPGO &PGO, unsigned CounterIndex)
+    : PGO(&PGO), Counter(CounterIndex), Count(PGO.getRegionCount(Counter)),
+      ParentCount(PGO.getCurrentRegionCount()), Adjust(0) {}
+
+public:
+  RegionCounter(CodeGenPGO &PGO, const Stmt *S)
+    : PGO(&PGO), Counter(PGO.getRegionCounter(S)),
+      Count(PGO.getRegionCount(Counter)),
+      ParentCount(PGO.getCurrentRegionCount()), Adjust(0) {}
+
+  /// Get the value of the counter. In most cases this is the number of times
+  /// the region of the counter was entered, but for switch labels it's the
+  /// number of direct jumps to that label.
+  uint64_t getCount() const { return Count; }
+  /// Get the value of the counter with adjustments applied. Adjustments occur
+  /// when control enters or leaves the region abnormally, ie, if there is a
+  /// jump to a label within the region, or if the function can return from
+  /// within the region. The adjusted count, then, is the value of the counter
+  /// at the end of the region.
+  uint64_t getAdjustedCount() const {
+    assert(Adjust > 0 || (uint64_t)(-Adjust) <= Count && "Negative count");
+    return Count + Adjust;
+  }
+  /// Get the value of the counter in this region's parent, ie, the region that
+  /// was active when this region began. This is useful for deriving counts in
+  /// implicitly counted regions, like the false case of a condition or the
+  /// normal exits of a loop.
+  uint64_t getParentCount() const { return ParentCount; }
+
+  /// Get the number of times the condition of a loop will evaluate false. This
+  /// is the number of times we enter the loop, adjusted by the difference
+  /// between entering and exiting the loop body normally, excepting that
+  /// 'continue' statements also bring us back here.
+  ///
+  /// Undefined if this counter is not counting a loop.
+  uint64_t getLoopExitCount() const {
+    return getParentCount() + getContinueCounter().getCount() +
+      getAdjustedCount() - getCount();
+  }
+  /// Get the associated break counter. Undefined if this counter is not
+  /// counting a loop.
+  RegionCounter getBreakCounter() const {
+    return RegionCounter(*PGO, Counter + 1);
+  }
+  /// Get the associated continue counter. Undefined if this counter is not
+  /// counting a loop.
+  RegionCounter getContinueCounter() const {
+    return RegionCounter(*PGO, Counter + 2);
+  }
+
+  /// Activate the counter by emitting an increment and starting to track
+  /// adjustments. If AddIncomingFallThrough is true, the current region count
+  /// will be added to the counter for the purposes of tracking the region.
+  void beginRegion(CGBuilderTy &Builder, bool AddIncomingFallThrough=false) {
+    RegionCount = Count;
+    if (AddIncomingFallThrough)
+      RegionCount += PGO->getCurrentRegionCount();
+    PGO->setCurrentRegionCount(RegionCount);
+    PGO->emitCounterIncrement(Builder, Counter);
+  }
+  /// For counters on boolean branches, begins tracking adjustments for the
+  /// uncounted path.
+  void beginElseRegion() {
+    RegionCount = ParentCount - Count;
+    PGO->setCurrentRegionCount(RegionCount);
+  }
+
+  /// Control may either enter or leave the region, so the count at the end may
+  /// be different from the start. Call this to track that adjustment without
+  /// modifying the current count. Must not be called before one of beginRegion
+  /// or beginElseRegion.
+  void adjustFallThroughCount() {
+    Adjust += PGO->getCurrentRegionCount() - RegionCount;
+  }
+  /// Commit all adjustments to the current region. This should be called after
+  /// all blocks that adjust the fallthrough count have been emitted.
+  void applyAdjustmentsToRegion() {
+    PGO->setCurrentRegionCount(ParentCount + Adjust);
+  }
+};
+
+}  // end namespace CodeGen
+}  // end namespace clang
+
+#endif

Added: cfe/trunk/test/CodeGen/Inputs/instr-profile.pgodata
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/Inputs/instr-profile.pgodata?rev=198640&view=auto
==============================================================================
--- cfe/trunk/test/CodeGen/Inputs/instr-profile.pgodata (added)
+++ cfe/trunk/test/CodeGen/Inputs/instr-profile.pgodata Mon Jan  6 16:27:43 2014
@@ -0,0 +1,127 @@
+simple_loops 10
+1
+100
+0
+0
+100
+0
+0
+76
+0
+0
+
+conditionals 13
+1
+100
+0
+0
+50
+50
+33
+33
+16
+99
+100
+99
+100
+
+early_exits 13
+1
+0
+51
+1
+25
+1
+25
+1
+26
+0
+0
+1
+0
+
+jumps 30
+1
+1
+0
+0
+0
+1
+0
+0
+0
+0
+1
+0
+1
+2
+3
+2
+0
+0
+0
+3
+0
+1
+1
+1
+10
+0
+0
+0
+10
+9
+
+switches 21
+1
+1
+1
+15
+0
+7
+7
+1
+0
+3
+2
+3
+3
+4
+4
+0
+4
+4
+5
+1
+0
+
+big_switch 19
+1
+32
+0
+0
+32
+1
+0
+2
+1
+11
+11
+1
+1
+15
+15
+1
+1
+2
+2
+
+no_usable_data 5
+1
+1
+1
+1
+1
+
+main 1
+1

Added: cfe/trunk/test/CodeGen/instr-profile.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/instr-profile.c?rev=198640&view=auto
==============================================================================
--- cfe/trunk/test/CodeGen/instr-profile.c (added)
+++ cfe/trunk/test/CodeGen/instr-profile.c Mon Jan  6 16:27:43 2014
@@ -0,0 +1,428 @@
+// Test that instrumentation based profiling feeds branch prediction
+// correctly. This tests both generation of profile data and use of the same,
+// and the input file for the -fprofile-instr-use case is expected to be result
+// of running the program generated by the -fprofile-instr-generate case
+// (excepting no_usable_data). As such, main() should call every function in
+// this test.
+
+// RUN: %clang %s -o - -emit-llvm -S -fprofile-instr-generate | FileCheck -check-prefix=PGOGEN %s
+// RUN: %clang %s -o - -emit-llvm -S -fprofile-instr-use=%S/Inputs/instr-profile.pgodata | FileCheck -check-prefix=PGOUSE %s
+
+// PGOGEN: @[[SLC:__llvm_pgo_ctr[0-9]*]] = private global [10 x i64] zeroinitializer
+// PGOGEN: @[[IFC:__llvm_pgo_ctr[0-9]*]] = private global [13 x i64] zeroinitializer
+// PGOGEN: @[[EEC:__llvm_pgo_ctr[0-9]*]] = private global [13 x i64] zeroinitializer
+// PGOGEN: @[[JMC:__llvm_pgo_ctr[0-9]*]] = private global [30 x i64] zeroinitializer
+// PGOGEN: @[[SWC:__llvm_pgo_ctr[0-9]*]] = private global [21 x i64] zeroinitializer
+// PGOGEN: @[[BSC:__llvm_pgo_ctr[0-9]*]] = private global [19 x i64] zeroinitializer
+// PGOGEN: @[[NOC:__llvm_pgo_ctr[0-9]*]] = private global [2 x i64]  zeroinitializer
+// PGOGEN: @[[MAC:__llvm_pgo_ctr[0-9]*]] = private global [1 x i64]  zeroinitializer
+
+// PGOGEN-LABEL: @simple_loops()
+// PGOUSE-LABEL: @simple_loops()
+// PGOGEN: store {{.*}} @[[SLC]], i64 0, i64 0
+void simple_loops() {
+  int i;
+  // PGOGEN: store {{.*}} @[[SLC]], i64 0, i64 1
+  // PGOUSE: br {{.*}} !prof ![[SL1:[0-9]+]]
+  for (i = 0; i < 100; ++i) {
+  }
+  // PGOGEN: store {{.*}} @[[SLC]], i64 0, i64 4
+  // PGOUSE: br {{.*}} !prof ![[SL2:[0-9]+]]
+  while (i > 0)
+    i--;
+  // PGOGEN: store {{.*}} @[[SLC]], i64 0, i64 7
+  // PGOUSE: br {{.*}} !prof ![[SL3:[0-9]+]]
+  do {} while (i++ < 75);
+
+  // PGOGEN-NOT: store {{.*}} @[[SLC]],
+  // PGOUSE-NOT: br {{.*}} !prof ![0-9]+
+}
+
+// PGOGEN-LABEL: @conditionals()
+// PGOUSE-LABEL: @conditionals()
+// PGOGEN: store {{.*}} @[[IFC]], i64 0, i64 0
+void conditionals() {
+  // PGOGEN: store {{.*}} @[[IFC]], i64 0, i64 1
+  // PGOUSE: br {{.*}} !prof ![[IF1:[0-9]+]]
+  for (int i = 0; i < 100; ++i) {
+    // PGOGEN: store {{.*}} @[[IFC]], i64 0, i64 4
+    // PGOUSE: br {{.*}} !prof ![[IF2:[0-9]+]]
+    if (i % 2) {
+      // PGOGEN: store {{.*}} @[[IFC]], i64 0, i64 5
+      // PGOUSE: br {{.*}} !prof ![[IF3:[0-9]+]]
+      if (i) {}
+    // PGOGEN: store {{.*}} @[[IFC]], i64 0, i64 6
+    // PGOUSE: br {{.*}} !prof ![[IF4:[0-9]+]]
+    } else if (i % 3) {
+      // PGOGEN: store {{.*}} @[[IFC]], i64 0, i64 7
+      // PGOUSE: br {{.*}} !prof ![[IF5:[0-9]+]]
+      if (i) {}
+    } else {
+      // PGOGEN: store {{.*}} @[[IFC]], i64 0, i64 8
+      // PGOUSE: br {{.*}} !prof ![[IF6:[0-9]+]]
+      if (i) {}
+    }
+
+    // PGOGEN: store {{.*}} @[[IFC]], i64 0, i64 10
+    // PGOGEN: store {{.*}} @[[IFC]], i64 0, i64 9
+    // PGOUSE: br {{.*}} !prof ![[IF7:[0-9]+]]
+    if (1 && i) {}
+    // PGOGEN: store {{.*}} @[[IFC]], i64 0, i64 12
+    // PGOGEN: store {{.*}} @[[IFC]], i64 0, i64 11
+    // PGOUSE: br {{.*}} !prof ![[IF8:[0-9]+]]
+    if (0 || i) {}
+  }
+
+  // PGOGEN-NOT: store {{.*}} @[EEC]],
+  // PGOUSE-NOT: br {{.*}} !prof ![0-9]+
+}
+
+// PGOGEN-LABEL: @early_exits()
+// PGOUSE-LABEL: @early_exits()
+// PGOGEN: store {{.*}} @[[EEC]], i64 0, i64 0
+void early_exits() {
+  int i = 0;
+
+  // PGOGEN: store {{.*}} @[[EEC]], i64 0, i64 1
+  // PGOUSE: br {{.*}} !prof ![[EE1:[0-9]+]]
+  if (i) {}
+
+  // PGOGEN: store {{.*}} @[[EEC]], i64 0, i64 2
+  // PGOUSE: br {{.*}} !prof ![[EE2:[0-9]+]]
+  while (i < 100) {
+    i++;
+    // PGOGEN: store {{.*}} @[[EEC]], i64 0, i64 5
+    // PGOUSE: br {{.*}} !prof ![[EE3:[0-9]+]]
+    if (i > 50)
+      // PGOGEN: store {{.*}} @[[EEC]], i64 0, i64 3
+      break;
+    // PGOGEN: store {{.*}} @[[EEC]], i64 0, i64 6
+    // PGOUSE: br {{.*}} !prof ![[EE4:[0-9]+]]
+    if (i % 2)
+      // PGOGEN: store {{.*}} @[[EEC]], i64 0, i64 4
+      continue;
+  }
+
+  // PGOGEN: store {{.*}} @[[EEC]], i64 0, i64 7
+  // PGOUSE: br {{.*}} !prof ![[EE5:[0-9]+]]
+  if (i) {}
+
+  // PGOGEN: store {{.*}} @[[EEC]], i64 0, i64 8
+  do {
+    // PGOGEN: store {{.*}} @[[EEC]], i64 0, i64 11
+    // PGOUSE: br {{.*}} !prof ![[EE6:[0-9]+]]
+    if (i > 75)
+      return;
+    else
+      i++;
+  // PGOUSE: br {{.*}} !prof ![[EE7:[0-9]+]]
+  } while (i < 100);
+
+  // PGOGEN: store {{.*}} @[[EEC]], i64 0, i64 12
+  // Never reached -> no weights
+  if (i) {}
+
+  // PGOGEN-NOT: store {{.*}} @[[EEC]],
+  // PGOUSE-NOT: br {{.*}} !prof ![0-9]+
+}
+
+// PGOGEN-LABEL: @jumps()
+// PGOUSE-LABEL: @jumps()
+// PGOGEN: store {{.*}} @[[JMC]], i64 0, i64 0
+void jumps() {
+  int i;
+
+  // PGOGEN: store {{.*}} @[[JMC]], i64 0, i64 1
+  // PGOUSE: br {{.*}} !prof ![[JM1:[0-9]+]]
+  for (i = 0; i < 2; ++i) {
+    goto outofloop;
+    // Never reached -> no weights
+    if (i) {}
+  }
+// PGOGEN: store {{.*}} @[[JMC]], i64 0, i64 5
+outofloop:
+  // PGOGEN: store {{.*}} @[[JMC]], i64 0, i64 6
+  // PGOUSE: br {{.*}} !prof ![[JM2:[0-9]+]]
+  if (i) {}
+
+  goto loop1;
+
+  // PGOGEN: store {{.*}} @[[JMC]], i64 0, i64 7
+  // PGOUSE: br {{.*}} !prof ![[JM3:[0-9]+]]
+  while (i) {
+  // PGOGEN: store {{.*}} @[[JMC]], i64 0, i64 10
+  loop1:
+    // PGOGEN: store {{.*}} @[[JMC]], i64 0, i64 11
+    // PGOUSE: br {{.*}} !prof ![[JM4:[0-9]+]]
+    if (i) {}
+  }
+
+  goto loop2;
+// PGOGEN: store {{.*}} @[[JMC]], i64 0, i64 12
+first:
+// PGOGEN: store {{.*}} @[[JMC]], i64 0, i64 13
+second:
+// PGOGEN: store {{.*}} @[[JMC]], i64 0, i64 14
+third:
+  i++;
+  // PGOGEN: store {{.*}} @[[JMC]], i64 0, i64 15
+  // PGOUSE: br {{.*}} !prof ![[JM5:[0-9]+]]
+  if (i < 3)
+    goto loop2;
+
+  // PGOGEN: store {{.*}} @[[JMC]], i64 0, i64 16
+  // PGOUSE: br {{.*}} !prof ![[JM6:[0-9]+]]
+  while (i < 3) {
+  // PGOGEN: store {{.*}} @[[JMC]], i64 0, i64 19
+  loop2:
+    // PGOUSE: switch {{.*}} [
+    // PGOUSE: ], !prof ![[JM7:[0-9]+]]
+    switch (i) {
+    // PGOGEN: store {{.*}} @[[JMC]], i64 0, i64 21
+    case 0:
+      goto first;
+    // PGOGEN: store {{.*}} @[[JMC]], i64 0, i64 22
+    case 1:
+      goto second;
+    // PGOGEN: store {{.*}} @[[JMC]], i64 0, i64 23
+    case 2:
+      goto third;
+    }
+    // PGOGEN: store {{.*}} @[[JMC]], i64 0, i64 20
+  }
+
+  // PGOGEN: store {{.*}} @[[JMC]], i64 0, i64 24
+  // PGOUSE: br {{.*}} !prof ![[JM8:[0-9]+]]
+  for (i = 0; i < 10; ++i) {
+    goto withinloop;
+    // never reached -> no weights
+    if (i) {}
+  // PGOGEN: store {{.*}} @[[JMC]], i64 0, i64 28
+  withinloop:
+    // PGOGEN: store {{.*}} @[[JMC]], i64 0, i64 29
+    // PGOUSE: br {{.*}} !prof ![[JM9:[0-9]+]]
+    if (i) {}
+  }
+
+  // PGOGEN-NOT: store {{.*}} @[[JMC]],
+  // PGOUSE-NOT: br {{.*}} !prof ![0-9]+
+}
+
+// PGOGEN-LABEL: @switches()
+// PGOUSE-LABEL: @switches()
+// PGOGEN: store {{.*}} @[[SWC]], i64 0, i64 0
+void switches() {
+  static int weights[] = {1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 5};
+
+  // No cases -> no weights
+  switch (weights[0]) {
+  // PGOGEN: store {{.*}} @[[SWC]], i64 0, i64 2
+  default:
+    break;
+  }
+  // PGOGEN: store {{.*}} @[[SWC]], i64 0, i64 1
+
+  // PGOGEN: store {{.*}} @[[SWC]], i64 0, i64 3
+  // PGOUSE: br {{.*}} !prof ![[SW1:[0-9]+]]
+  for (int i = 0, len = sizeof(weights) / sizeof(weights[0]); i < len; ++i) {
+    // PGOUSE: switch {{.*}} [
+    // PGOUSE: ], !prof ![[SW2:[0-9]+]]
+    switch (i[weights]) {
+    // PGOGEN: store {{.*}} @[[SWC]], i64 0, i64 7
+    case 1:
+      // PGOGEN: store {{.*}} @[[SWC]], i64 0, i64 8
+      // PGOUSE: br {{.*}} !prof ![[SW3:[0-9]+]]
+      if (i) {}
+      // fallthrough
+    // PGOGEN: store {{.*}} @[[SWC]], i64 0, i64 9
+    case 2:
+      // PGOGEN: store {{.*}} @[[SWC]], i64 0, i64 10
+      // PGOUSE: br {{.*}} !prof ![[SW4:[0-9]+]]
+      if (i) {}
+      break;
+    // PGOGEN: store {{.*}} @[[SWC]], i64 0, i64 11
+    case 3:
+      // PGOGEN: store {{.*}} @[[SWC]], i64 0, i64 12
+      // PGOUSE: br {{.*}} !prof ![[SW5:[0-9]+]]
+      if (i) {}
+      continue;
+    // PGOGEN: store {{.*}} @[[SWC]], i64 0, i64 13
+    case 4:
+      // PGOGEN: store {{.*}} @[[SWC]], i64 0, i64 14
+      // PGOUSE: br {{.*}} !prof ![[SW6:[0-9]+]]
+      if (i) {}
+      // PGOUSE: switch {{.*}} [
+      // PGOUSE: ], !prof ![[SW7:[0-9]+]]
+      switch (i) {
+      // PGOGEN: store {{.*}} @[[SWC]], i64 0, i64 16
+      case 6 ... 9:
+        // PGOGEN: store {{.*}} @[[SWC]], i64 0, i64 17
+        // PGOUSE: br {{.*}} !prof ![[SW8:[0-9]+]]
+        if (i) {}
+        continue;
+      }
+      // PGOGEN: store {{.*}} @[[SWC]], i64 0, i64 15
+
+    // PGOGEN: store {{.*}} @[[SWC]], i64 0, i64 18
+    default:
+      // PGOGEN: store {{.*}} @[[SWC]], i64 0, i64 19
+      // PGOUSE: br {{.*}} !prof ![[SW9:[0-9]+]]
+      if (i == len - 1)
+        return;
+    }
+    // PGOGEN: store {{.*}} @[[SWC]], i64 0, i64 6
+  }
+
+  // PGOGEN: store {{.*}} @[[SWC]], i64 0, i64 20
+  // Never reached -> no weights
+  if (weights[0]) {}
+
+  // PGOGEN-NOT: store {{.*}} @[[SWC]],
+  // PGOUSE-NOT: br {{.*}} !prof ![0-9]+
+}
+
+// PGOGEN-LABEL: @big_switch()
+// PGOUSE-LABEL: @big_switch()
+// PGOGEN: store {{.*}} @[[BSC]], i64 0, i64 0
+void big_switch() {
+  // PGOGEN: store {{.*}} @[[BSC]], i64 0, i64 1
+  // PGOUSE: br {{.*}} !prof ![[BS1:[0-9]+]]
+  for (int i = 0; i < 32; ++i) {
+    // PGOUSE: switch {{.*}} [
+    // PGOUSE: ], !prof ![[BS2:[0-9]+]]
+    switch (1 << i) {
+    // PGOGEN: store {{.*}} @[[BSC]], i64 0, i64 5
+    case (1 << 0):
+      // PGOGEN: store {{.*}} @[[BSC]], i64 0, i64 6
+      // PGOUSE: br {{.*}} !prof ![[BS3:[0-9]+]]
+      if (i) {}
+      // fallthrough
+    // PGOGEN: store {{.*}} @[[BSC]], i64 0, i64 7
+    case (1 << 1):
+      // PGOGEN: store {{.*}} @[[BSC]], i64 0, i64 8
+      // PGOUSE: br {{.*}} !prof ![[BS4:[0-9]+]]
+      if (i) {}
+      break;
+    // PGOGEN: store {{.*}} @[[BSC]], i64 0, i64 9
+    case (1 << 2) ... (1 << 12):
+      // PGOGEN: store {{.*}} @[[BSC]], i64 0, i64 10
+      // PGOUSE: br {{.*}} !prof ![[BS5:[0-9]+]]
+      if (i) {}
+      break;
+    // The branch for the large case range above appears after the case body
+    // PGOUSE: br {{.*}} !prof ![[BS6:[0-9]+]]
+
+    // PGOGEN: store {{.*}} @[[BSC]], i64 0, i64 11
+    case (1 << 13):
+      // PGOGEN: store {{.*}} @[[BSC]], i64 0, i64 12
+      // PGOUSE: br {{.*}} !prof ![[BS7:[0-9]+]]
+      if (i) {}
+      break;
+    // PGOGEN: store {{.*}} @[[BSC]], i64 0, i64 13
+    case (1 << 14) ... (1 << 28):
+      // PGOGEN: store {{.*}} @[[BSC]], i64 0, i64 14
+      // PGOUSE: br {{.*}} !prof ![[BS8:[0-9]+]]
+      if (i) {}
+      break;
+    // The branch for the large case range above appears after the case body
+    // PGOUSE: br {{.*}} !prof ![[BS9:[0-9]+]]
+
+    // PGOGEN: store {{.*}} @[[BSC]], i64 0, i64 15
+    case (1 << 29) ... ((1 << 29) + 1):
+      // PGOGEN: store {{.*}} @[[BSC]], i64 0, i64 16
+      // PGOUSE: br {{.*}} !prof ![[BS10:[0-9]+]]
+      if (i) {}
+      break;
+    // PGOGEN: store {{.*}} @[[BSC]], i64 0, i64 17
+    default:
+      // PGOGEN: store {{.*}} @[[BSC]], i64 0, i64 18
+      // PGOUSE: br {{.*}} !prof ![[BS11:[0-9]+]]
+      if (i) {}
+      break;
+    }
+    // PGOGEN: store {{.*}} @[[BSC]], i64 0, i64 4
+  }
+
+  // PGOGEN-NOT: store {{.*}} @[[BSC]],
+  // PGOUSE-NOT: br {{.*}} !prof ![0-9]+
+  // PGOUSE: ret void
+}
+
+// PGOGEN-LABEL: @no_usable_data()
+// PGOUSE-LABEL: @no_usable_data()
+// PGOGEN: store {{.*}} @[[NOC]], i64 0, i64 0
+void no_usable_data() {
+  // The input data for PGOUSE is deliberately invalid for this function, so
+  // that we can test that we reject and ignore it properly.
+  int i = 0;
+
+  // PGOGEN: store {{.*}} @[[NOC]], i64 0, i64 1
+  if (i) {}
+
+  // PGOGEN-NOT: store {{.*}} @[[NOC]],
+  // PGOUSE-NOT: br {{.*}} !prof ![0-9]+
+}
+
+// PGOUSE-DAG: ![[SL1]] = metadata !{metadata !"branch_weights", i32 101, i32 2}
+// PGOUSE-DAG: ![[SL2]] = metadata !{metadata !"branch_weights", i32 101, i32 2}
+// PGOUSE-DAG: ![[SL3]] = metadata !{metadata !"branch_weights", i32 76, i32 2}
+
+// PGOUSE-DAG: ![[EE1]] = metadata !{metadata !"branch_weights", i32 1, i32 2}
+// PGOUSE-DAG: ![[EE2]] = metadata !{metadata !"branch_weights", i32 52, i32 1}
+// PGOUSE-DAG: ![[EE3]] = metadata !{metadata !"branch_weights", i32 2, i32 51}
+// PGOUSE-DAG: ![[EE4]] = metadata !{metadata !"branch_weights", i32 26, i32 26}
+// PGOUSE-DAG: ![[EE5]] = metadata !{metadata !"branch_weights", i32 2, i32 1}
+// PGOUSE-DAG: ![[EE6]] = metadata !{metadata !"branch_weights", i32 2, i32 26}
+// PGOUSE-DAG: ![[EE7]] = metadata !{metadata !"branch_weights", i32 26, i32 1}
+
+// PGOUSE-DAG: ![[IF1]] = metadata !{metadata !"branch_weights", i32 101, i32 2}
+// PGOUSE-DAG: ![[IF2]] = metadata !{metadata !"branch_weights", i32 51, i32 51}
+// PGOUSE-DAG: ![[IF3]] = metadata !{metadata !"branch_weights", i32 51, i32 1}
+// PGOUSE-DAG: ![[IF4]] = metadata !{metadata !"branch_weights", i32 34, i32 18}
+// PGOUSE-DAG: ![[IF5]] = metadata !{metadata !"branch_weights", i32 34, i32 1}
+// PGOUSE-DAG: ![[IF6]] = metadata !{metadata !"branch_weights", i32 17, i32 2}
+// PGOUSE-DAG: ![[IF7]] = metadata !{metadata !"branch_weights", i32 100, i32 2}
+// PGOUSE-DAG: ![[IF8]] = metadata !{metadata !"branch_weights", i32 100, i32 2}
+
+// PGOUSE-DAG: ![[JM1]] = metadata !{metadata !"branch_weights", i32 2, i32 1}
+// PGOUSE-DAG: ![[JM2]] = metadata !{metadata !"branch_weights", i32 1, i32 2}
+// PGOUSE-DAG: ![[JM3]] = metadata !{metadata !"branch_weights", i32 1, i32 2}
+// PGOUSE-DAG: ![[JM4]] = metadata !{metadata !"branch_weights", i32 1, i32 2}
+// PGOUSE-DAG: ![[JM5]] = metadata !{metadata !"branch_weights", i32 3, i32 2}
+// PGOUSE-DAG: ![[JM6]] = metadata !{metadata !"branch_weights", i32 1, i32 2}
+// PGOUSE-DAG: ![[JM7]] = metadata !{metadata !"branch_weights", i32 1, i32 2, i32 2, i32 2}
+// PGOUSE-DAG: ![[JM8]] = metadata !{metadata !"branch_weights", i32 11, i32 2}
+// PGOUSE-DAG: ![[JM9]] = metadata !{metadata !"branch_weights", i32 10, i32 2}
+
+// PGOUSE-DAG: ![[SW1]] = metadata !{metadata !"branch_weights", i32 16, i32 1}
+// PGOUSE-DAG: ![[SW2]] = metadata !{metadata !"branch_weights", i32 6, i32 2, i32 3, i32 4, i32 5}
+// PGOUSE-DAG: ![[SW3]] = metadata !{metadata !"branch_weights", i32 1, i32 2}
+// PGOUSE-DAG: ![[SW4]] = metadata !{metadata !"branch_weights", i32 3, i32 2}
+// PGOUSE-DAG: ![[SW5]] = metadata !{metadata !"branch_weights", i32 4, i32 1}
+// PGOUSE-DAG: ![[SW6]] = metadata !{metadata !"branch_weights", i32 5, i32 1}
+// PGOUSE-DAG: ![[SW7]] = metadata !{metadata !"branch_weights", i32 1, i32 2, i32 2, i32 2, i32 2}
+// PGOUSE-DAG: ![[SW8]] = metadata !{metadata !"branch_weights", i32 5, i32 1}
+// PGOUSE-DAG: ![[SW9]] = metadata !{metadata !"branch_weights", i32 2, i32 5}
+
+// PGOUSE-DAG: ![[BS1]] = metadata !{metadata !"branch_weights", i32 33, i32 2}
+// PGOUSE-DAG: ![[BS2]] = metadata !{metadata !"branch_weights", i32 29, i32 2, i32 2, i32 2, i32 2, i32 1}
+// PGOUSE-DAG: ![[BS3]] = metadata !{metadata !"branch_weights", i32 1, i32 2}
+// PGOUSE-DAG: ![[BS4]] = metadata !{metadata !"branch_weights", i32 2, i32 2}
+// PGOUSE-DAG: ![[BS5]] = metadata !{metadata !"branch_weights", i32 12, i32 1}
+// PGOUSE-DAG: ![[BS6]] = metadata !{metadata !"branch_weights", i32 12, i32 3}
+// PGOUSE-DAG: ![[BS7]] = metadata !{metadata !"branch_weights", i32 2, i32 1}
+// PGOUSE-DAG: ![[BS8]] = metadata !{metadata !"branch_weights", i32 16, i32 1}
+// PGOUSE-DAG: ![[BS9]] = metadata !{metadata !"branch_weights", i32 16, i32 14}
+// PGOUSE-DAG: ![[BS10]] = metadata !{metadata !"branch_weights", i32 2, i32 1}
+// PGOUSE-DAG: ![[BS11]] = metadata !{metadata !"branch_weights", i32 3, i32 1}
+
+int main(int argc, const char *argv[]) {
+  simple_loops();
+  conditionals();
+  early_exits();
+  jumps();
+  switches();
+  big_switch();
+  no_usable_data();
+  return 0;
+}

Added: cfe/trunk/test/CodeGenCXX/Inputs/instr-profile.pgodata
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCXX/Inputs/instr-profile.pgodata?rev=198640&view=auto
==============================================================================
--- cfe/trunk/test/CodeGenCXX/Inputs/instr-profile.pgodata (added)
+++ cfe/trunk/test/CodeGenCXX/Inputs/instr-profile.pgodata Mon Jan  6 16:27:43 2014
@@ -0,0 +1,16 @@
+_Z6throwsv 11
+1
+100
+0
+0
+100
+66
+33
+17
+50
+33
+100
+
+main 1
+1
+

Added: cfe/trunk/test/CodeGenCXX/instr-profile.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCXX/instr-profile.cpp?rev=198640&view=auto
==============================================================================
--- cfe/trunk/test/CodeGenCXX/instr-profile.cpp (added)
+++ cfe/trunk/test/CodeGenCXX/instr-profile.cpp Mon Jan  6 16:27:43 2014
@@ -0,0 +1,73 @@
+// Test that instrumentation based profiling feeds branch prediction
+// correctly. This tests both generation of profile data and use of the same,
+// and the input file for the -fprofile-instr-use case is expected to be result
+// of running the program generated by the -fprofile-instr-generate case. As
+// such, main() should call every function in this test.
+
+// RUN: %clangxx %s -o - -emit-llvm -S -fprofile-instr-generate | FileCheck -check-prefix=PGOGEN %s
+// RUN: %clangxx %s -o - -emit-llvm -S -fprofile-instr-generate | FileCheck -check-prefix=PGOGEN-EXC %s
+
+// RUN: %clang %s -o - -emit-llvm -S -fprofile-instr-use=%S/Inputs/instr-profile.pgodata | FileCheck -check-prefix=PGOUSE %s
+// RUN: %clang %s -o - -emit-llvm -S -fprofile-instr-use=%S/Inputs/instr-profile.pgodata | FileCheck -check-prefix=PGOUSE-EXC %s
+
+// PGOGEN: @[[THC:__llvm_pgo_ctr[0-9]*]] = private global [11 x i64] zeroinitializer
+// PGOGEN-EXC: @[[THC:__llvm_pgo_ctr[0-9]*]] = private global [11 x i64] zeroinitializer
+
+// PGOGEN-LABEL: @_Z6throwsv()
+// PGOUSE-LABEL: @_Z6throwsv()
+// PGOGEN: store {{.*}} @[[THC]], i64 0, i64 0
+void throws() {
+  // PGOGEN: store {{.*}} @[[THC]], i64 0, i64 1
+  // PGOUSE: br {{.*}} !prof ![[TH1:[0-9]+]]
+  for (int i = 0; i < 100; ++i) {
+    try {
+      // PGOGEN: store {{.*}} @[[THC]], i64 0, i64 5
+      // PGOUSE: br {{.*}} !prof ![[TH2:[0-9]+]]
+      if (i % 3) {
+        // PGOGEN: store {{.*}} @[[THC]], i64 0, i64 6
+        // PGOUSE: br {{.*}} !prof ![[TH3:[0-9]+]]
+        if (i < 50)
+          throw 1;
+      } else {
+        // The catch block may be emitted after the throw above, we can skip it
+        // by looking for an else block, but this will break if anyone puts an
+        // else in the catch
+        // PGOUSE: if.else{{.*}}:
+        // PGOGEN: if.else{{.*}}:
+
+        // PGOGEN: store {{.*}} @[[THC]], i64 0, i64 7
+        // PGOUSE: br {{.*}} !prof ![[TH4:[0-9]+]]
+        if (i >= 50)
+          throw 0;
+      }
+    } catch (int e) {
+      // PGOUSE-EXC: catch{{.*}}:
+      // PGOGEN-EXC: catch{{.*}}:
+
+      // PGOGEN-EXC: store {{.*}} @[[THC]], i64 0, i64 8
+      // PGOGEN-EXC: store {{.*}} @[[THC]], i64 0, i64 9
+      // PGOUSE-EXC: br {{.*}} !prof ![[TH5:[0-9]+]]
+      if (e) {}
+    }
+    // PGOGEN: store {{.*}} @[[THC]], i64 0, i64 4
+
+    // PGOGEN: store {{.*}} @[[THC]], i64 0, i64 10
+    // PGOUSE: br {{.*}} !prof ![[TH6:[0-9]+]]
+    if (i < 100) {}
+  }
+
+  // PGOUSE-NOT: br {{.*}} !prof ![0-9]+
+  // PGOUSE: ret void
+}
+
+// PGOUSE-DAG: ![[TH1]] = metadata !{metadata !"branch_weights", i32 101, i32 2}
+// PGOUSE-DAG: ![[TH2]] = metadata !{metadata !"branch_weights", i32 67, i32 35}
+// PGOUSE-DAG: ![[TH3]] = metadata !{metadata !"branch_weights", i32 34, i32 34}
+// PGOUSE-DAG: ![[TH4]] = metadata !{metadata !"branch_weights", i32 18, i32 18}
+// PGOUSE-EXC: ![[TH5]] = metadata !{metadata !"branch_weights", i32 34, i32 18}
+// PGOUSE-DAG: ![[TH6]] = metadata !{metadata !"branch_weights", i32 101, i32 1}
+
+int main(int argc, const char *argv[]) {
+  throws();
+  return 0;
+}





More information about the cfe-commits mailing list