[PATCH] D23727: [Profile] SelectInst instrumentation Support in IR-PGO

Justin Bogner via llvm-commits llvm-commits at lists.llvm.org
Tue Aug 23 11:18:43 PDT 2016


David Li via llvm-commits <llvm-commits at lists.llvm.org> writes:
> Select instructions are currently ignored by PGO leading to missing
> profile data which is useful in guiding the select instruction
> lowering. This patch implements this missing feature for IRPGO.
>
> A new counter increment intrinsic is introduced (extended from
> existing increment) that also takes a step argument. It is used to
> track the number times the true value of a select instruction is
> taken. The FalseValue count is derived from TrueValue count and the
> parent BB's profile count.

Does this really merit a new intrinsic? Why not just teach the existing
intrinsic to take a step argument?

> https://reviews.llvm.org/D23727
 ...
> davidxl updated this revision to Diff 68884.
>
> Index: test/Transforms/PGOProfile/select1.ll
> ===================================================================
> --- test/Transforms/PGOProfile/select1.ll
> +++ test/Transforms/PGOProfile/select1.ll
> @@ -0,0 +1,34 @@
> +; RUN: opt < %s -pgo-instr-gen -pgo-instr-select=true -S | FileCheck %s --check-prefix=GEN
> +; RUN: opt < %s -passes=pgo-instr-gen -pgo-instr-select=true -S | FileCheck %s --check-prefix=GEN
> +; RUN: opt < %s -pgo-instr-gen -pgo-instr-select=false -S | FileCheck %s --check-prefix=NOSELECT
> +; RUN: opt < %s -passes=pgo-instr-gen -pgo-instr-select=false -S | FileCheck %s --check-prefix=NOSELECT
> +; RUN: llvm-profdata merge %S/Inputs/select1.proftext -o %t.profdata
> +; RUN: opt < %s -pgo-instr-use -pgo-test-profile-file=%t.profdata -pgo-instr-select=true -S | FileCheck %s --check-prefix=USE
> +; RUN: opt < %s -passes=pgo-instr-use -pgo-test-profile-file=%t.profdata -pgo-instr-select=true -S | FileCheck %s --check-prefix=USE
> +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
> +target triple = "x86_64-unknown-linux-gnu"
> +
> +define i32 @test_br_2(i32 %i) {
> +entry:
> +  %cmp = icmp sgt i32 %i, 0
> +  br i1 %cmp, label %if.then, label %if.else
> +
> +if.then:
> +  %add = add nsw i32 %i, 2
> +;GEN: %[[STEP:[0-9]+]] = zext i1 %cmp to i64
> +;GEN: call void @llvm.instrprof.increment.step({{.*}} i32 3, i32 2, i64 %[[STEP]])
> +;NOSELECT-NOT: call void @llvm.instrprof.increment.step
> +  %s = select i1 %cmp, i32 %add, i32 0
> +;USE: select i1 %cmp{{.*}}, !prof ![[BW_ENTRY:[0-9]+]]
> +;USE: ![[BW_ENTRY]] = !{!"branch_weights", i32 1, i32 3}
> +
> +  br label %if.end
> +
> +if.else:
> +  %sub = sub nsw i32 %i, 2
> +  br label %if.end
> +
> +if.end:
> +  %retv = phi i32 [ %add, %if.then ], [ %sub, %if.else ]
> +  ret i32 %retv
> +}
> Index: test/Transforms/PGOProfile/Inputs/select1.proftext
> ===================================================================
> --- test/Transforms/PGOProfile/Inputs/select1.proftext
> +++ test/Transforms/PGOProfile/Inputs/select1.proftext
> @@ -0,0 +1,9 @@
> +# :ir is the flag to indicate this is IR level profile.

I'm not sure this comment is really helping anybody.

> +:ir
> +test_br_2
> +72057623705475732
> +3
> +4
> +1
> +1
> +
> Index: lib/Transforms/Instrumentation/PGOInstrumentation.cpp
> ===================================================================
> --- lib/Transforms/Instrumentation/PGOInstrumentation.cpp
> +++ lib/Transforms/Instrumentation/PGOInstrumentation.cpp
> @@ -86,6 +86,7 @@
>  #define DEBUG_TYPE "pgo-instrumentation"
>  
>  STATISTIC(NumOfPGOInstrument, "Number of edges instrumented.");
> +STATISTIC(NumOfPGOSelectInsts, "Number of select instruction instrumented.");
>  STATISTIC(NumOfPGOEdge, "Number of edges.");
>  STATISTIC(NumOfPGOBB, "Number of basic-blocks.");
>  STATISTIC(NumOfPGOSplit, "Number of critical edge splits.");
> @@ -133,7 +134,56 @@
>  static cl::opt<bool> NoPGOWarnMismatch("no-pgo-warn-mismatch", cl::init(false),
>                                         cl::Hidden);
>  
> +// Command line option to enable/disable select instruction instrumentation.
> +static cl::opt<bool> PGOInstrSelect("pgo-instr-select", cl::init(true),
> +                                    cl::Hidden);
>  namespace {
> +
> +/// The select instruction visitor plays three roles specified
> +/// by the mode. In \c VM_counting mode, it simply counts the number of
> +/// select instructions. In \c VM_instrument mode, it inserts code to count
> +/// the number times TrueValue of select is taken. In \c VM_annotate mode,
> +/// it reads the profile data and annotate the select instruction with metadata.
> +enum VisitMode { VM_counting, VM_instrument, VM_annotate };
> +class PGOUseFunc;
> +
> +/// Instruction Visitor class to visit select instructions.
> +struct SelectInstVisitor : public InstVisitor<SelectInstVisitor> {
> +  Function &F;
> +  unsigned NSIs = 0;            // Number of select instructions instrumented.
> +  VisitMode Mode = VM_counting; // Visiting mode.
> +  unsigned *CI = nullptr;       // pointer to current counter index.
> +  unsigned NC = 0;              // Number of counters
> +  GlobalVariable *FuncNameVar = nullptr;
> +  uint64_t FuncHash = 0;
> +  PGOUseFunc *UseFunc = nullptr;
> +
> +  SelectInstVisitor(Function &Func) : F(Func) {}
> +
> +  // Set the visitor in \c VM_instrument mode. The initial value
> +  // of \p *I will be first the counter index for select instructions.
> +  // \p *I will be updated after each select instruction visit.
> +  // \p C is the total number of counters in this function.
> +  void SetCounterIndex(unsigned *I, unsigned C, GlobalVariable *FN,
> +                       uint64_t FH) {
> +    Mode = VM_instrument;
> +    CI = I;
> +    NC = C;
> +    FuncHash = FH;
> +    FuncNameVar = FN;
> +  }
> +  // Set the visitor in \c VM_annotate mode. \p UF is the function
> +  // annotator, and \p I is the pointer to the counter index variable.
> +  void SetCounterUse(PGOUseFunc *UF, unsigned *I) {
> +    Mode = VM_annotate;
> +    UseFunc = UF;
> +    CI = I;
> +  }
> +  // Visit \p SI instruction and perform tasks according to visit mode.
> +  void visitSelectInst(SelectInst &SI);
> +  unsigned NumOfSelectInsts() const { return NSIs; }

Please name functions as verbs, and starting with a lower case
letter. Ie, getNumberOfSelectInsts or something like that.

> +};
> +
>  class PGOInstrumentationGenLegacyPass : public ModulePass {
>  public:
>    static char ID;
> @@ -180,6 +230,7 @@
>      AU.addRequired<BlockFrequencyInfoWrapperPass>();
>    }
>  };
> +
>  } // end anonymous namespace
>  
>  char PGOInstrumentationGenLegacyPass::ID = 0;
> @@ -254,6 +305,7 @@
>    std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers;
>  
>  public:
> +  SelectInstVisitor SIVisitor;
>    std::string FuncName;
>    GlobalVariable *FuncNameVar;
>    // CFG hash value for this function.
> @@ -280,8 +332,14 @@
>        std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
>        bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr,
>        BlockFrequencyInfo *BFI = nullptr)
> -      : F(Func), ComdatMembers(ComdatMembers), FunctionHash(0),
> +      : F(Func), ComdatMembers(ComdatMembers), SIVisitor(Func), FunctionHash(0),
>          MST(F, BPI, BFI) {
> +
> +    // This should be done before CFG hash computation.
> +    assert(SIVisitor.Mode == VM_counting && "Wrong select visiting mode!");
> +    SIVisitor.visit(Func);
> +    NumOfPGOSelectInsts += SIVisitor.NumOfSelectInsts();
> +
>      FuncName = getPGOFuncName(F);
>      computeCFGHash();
>      if (ComdatMembers.size())
> @@ -308,7 +366,7 @@
>        if (!E->InMST && !E->Removed)
>          NumCounters++;
>      }
> -    return NumCounters;
> +    return NumCounters + SIVisitor.NumOfSelectInsts();
>    }
>  };
>  
> @@ -328,7 +386,8 @@
>      }
>    }
>    JC.update(Indexes);
> -  FunctionHash = (uint64_t)findIndirectCallSites(F).size() << 48 |
> +  FunctionHash = (uint64_t)SIVisitor.NumOfSelectInsts() << 56 |
> +                 (uint64_t)findIndirectCallSites(F).size() << 48 |
>                   (uint64_t)MST.AllEdges.size() << 32 | JC.getCRC();
>  }
>  
> @@ -473,6 +532,11 @@
>           Builder.getInt64(FuncInfo.FunctionHash), Builder.getInt32(NumCounters),
>           Builder.getInt32(I++)});
>    }
> +
> +  // Now instrument select instructions:
> +  FuncInfo.SIVisitor.SetCounterIndex(&I, NumCounters, FuncInfo.FuncNameVar,
> +                                     FuncInfo.FunctionHash);
> +  FuncInfo.SIVisitor.visit(F);

The statefulness of SIVisitor is kind of awkward - it isn't obvious to
me that SetCounterIndex would change the mode, and the assert earlier is
kind of unfortunate. Maybe instead of calling visit() in each mode we
should just have a different function for each operation? The three call
sites could then countSelects, instrumentSelects, and annotateSelects,
which saves a call to Set* and makes the comments redundant.

>    assert(I == NumCounters);
>  
>    if (DisableValueProfiling)
> @@ -594,17 +658,17 @@
>    // Return the profile record for this function;
>    InstrProfRecord &getProfileRecord() { return ProfileRecord; }
>  
> +  // Return the auxiliary BB information.
> +  UseBBInfo &getBBInfo(const BasicBlock *BB) const {
> +    return FuncInfo.getBBInfo(BB);
> +  }
> +
>  private:
>    Function &F;
>    Module *M;
>    // This member stores the shared information with class PGOGenFunc.
>    FuncPGOInstrumentation<PGOUseEdge, UseBBInfo> FuncInfo;
>  
> -  // Return the auxiliary BB information.
> -  UseBBInfo &getBBInfo(const BasicBlock *BB) const {
> -    return FuncInfo.getBBInfo(BB);
> -  }
> -
>    // The maximum count value in the profile. This is only used in PGO use
>    // compilation.
>    uint64_t ProgramMaxCount;
> @@ -677,6 +741,9 @@
>      NewEdge1.InMST = true;
>      getBBInfo(InstrBB).setBBInfoCount(CountValue);
>    }
> +  // Now annotate select instructions
> +  FuncInfo.SIVisitor.SetCounterUse(this, &I);
> +  FuncInfo.SIVisitor.visit(F);
>    assert(I == CountFromProfile.size());
>  }
>  
> @@ -820,7 +887,7 @@
>    DEBUG(FuncInfo.dumpInfo("after reading profile."));
>  }
>  
> -static void setProfMetadata(Module *M, TerminatorInst *TI,
> +static void setProfMetadata(Module *M, Instruction *TI,
>                              ArrayRef<uint64_t> EdgeCounts, uint64_t MaxCount) {
>    MDBuilder MDB(M->getContext());
>    assert(MaxCount > 0 && "Bad max count");
> @@ -869,6 +936,45 @@
>    }
>  }
>  
> +void SelectInstVisitor::visitSelectInst(SelectInst &SI) {
> +  if (!PGOInstrSelect)
> +    return;
> +  // FIXME: do not handle this yet.
> +  if (SI.getCondition()->getType()->isVectorTy())
> +    return;
> +
> +  NSIs++;
> +  if (Mode == VM_counting)
> +    return;
> +  else if (Mode == VM_instrument) {
> +    Module *M = F.getParent();
> +    IRBuilder<> Builder(&SI);
> +    Type *Int64Ty = Builder.getInt64Ty();
> +    Type *I8PtrTy = Builder.getInt8PtrTy();
> +    auto *Step = Builder.CreateZExt(SI.getCondition(), Int64Ty);
> +    Builder.CreateCall(
> +        Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment_step),
> +        {llvm::ConstantExpr::getBitCast(FuncNameVar, I8PtrTy),
> +         Builder.getInt64(FuncHash), Builder.getInt32(NC),
> +         Builder.getInt32(*CI), Step});
> +    ++(*CI);
> +  } else {
> +    assert(Mode == VM_annotate && "Unknown visiting mode");
> +    std::vector<uint64_t> &CountFromProfile =
> +        UseFunc->getProfileRecord().Counts;
> +    assert(*CI < CountFromProfile.size() && "Out of bound access of counters");
> +    uint64_t SCounts[2];
> +    SCounts[0] = CountFromProfile[*CI]; // True count
> +    ++(*CI);
> +    uint64_t TotalCount = UseFunc->getBBInfo(SI.getParent()).CountValue;
> +    // False Count
> +    SCounts[1] = (TotalCount > SCounts[0] ? TotalCount - SCounts[0] : 0);
> +    uint64_t MaxCount = std::max(SCounts[0], SCounts[1]);
> +
> +    setProfMetadata(F.getParent(), &SI, SCounts, MaxCount);
> +  }

I prefer the three functions, but for future reference a switch
statement (with no default label) would be better here - that way we'd
get a -Wcovered-switch if we added another mode and forgot to update a
spot.

> +}
> +
>  // Traverse all the indirect callsites and annotate the instructions.
>  void PGOUseFunc::annotateIndirectCallSites() {
>    if (DisableValueProfiling)
> Index: lib/Transforms/Instrumentation/InstrProfiling.cpp
> ===================================================================
> --- lib/Transforms/Instrumentation/InstrProfiling.cpp
> +++ lib/Transforms/Instrumentation/InstrProfiling.cpp
> @@ -107,6 +107,13 @@
>    return getInstrProfCoverageSectionName(isMachO());
>  }
>  
> +static InstrProfIncrementInst *castToIncrementInst(Instruction *Instr) {
> +  InstrProfIncrementInst *Inc = dyn_cast<InstrProfIncrementInstStep>(Instr);
> +  if (Inc)
> +    return Inc;
> +  return dyn_cast<InstrProfIncrementInst>(Instr);
> +}
> +
>  bool InstrProfiling::run(Module &M) {
>    bool MadeChange = false;
>  
> @@ -138,7 +145,8 @@
>      for (BasicBlock &BB : F)
>        for (auto I = BB.begin(), E = BB.end(); I != E;) {
>          auto Instr = I++;
> -        if (auto *Inc = dyn_cast<InstrProfIncrementInst>(Instr)) {
> +        InstrProfIncrementInst *Inc = castToIncrementInst(&*Instr);
> +        if (Inc) {
>            lowerIncrement(Inc);
>            MadeChange = true;
>          } else if (auto *Ind = dyn_cast<InstrProfValueProfileInst>(Instr)) {
> @@ -214,14 +222,22 @@
>    Ind->eraseFromParent();
>  }
>  
> +static Value *getIncrementStep(InstrProfIncrementInst *Inc,
> +                               IRBuilder<> &Builder) {
> +  auto *IncWithStep = dyn_cast<InstrProfIncrementInstStep>(Inc);
> +  if (IncWithStep)
> +    return IncWithStep->getStep();
> +  return Builder.getInt64(1);
> +}
> +
>  void InstrProfiling::lowerIncrement(InstrProfIncrementInst *Inc) {
>    GlobalVariable *Counters = getOrCreateRegionCounters(Inc);
>  
>    IRBuilder<> Builder(Inc);
>    uint64_t Index = Inc->getIndex()->getZExtValue();
>    Value *Addr = Builder.CreateConstInBoundsGEP2_64(Counters, 0, Index);
>    Value *Count = Builder.CreateLoad(Addr, "pgocount");
> -  Count = Builder.CreateAdd(Count, Builder.getInt64(1));
> +  Count = Builder.CreateAdd(Count, getIncrementStep(Inc, Builder));
>    Inc->replaceAllUsesWith(Builder.CreateStore(Count, Addr));
>    Inc->eraseFromParent();
>  }
> Index: include/llvm/IR/Intrinsics.td
> ===================================================================
> --- include/llvm/IR/Intrinsics.td
> +++ include/llvm/IR/Intrinsics.td
> @@ -346,6 +346,12 @@
>                                           llvm_i32_ty, llvm_i32_ty],
>                                          []>;
>  
> +// A counter increment with step for instrumentation based profiling.
> +def int_instrprof_increment_step : Intrinsic<[],
> +                                        [llvm_ptr_ty, llvm_i64_ty,
> +                                         llvm_i32_ty, llvm_i32_ty, llvm_i64_ty],
> +                                        []>;
> +
>  // A call to profile runtime for value profiling of target expressions
>  // through instrumentation based profiling.
>  def int_instrprof_value_profile : Intrinsic<[],
> Index: include/llvm/IR/IntrinsicInst.h
> ===================================================================
> --- include/llvm/IR/IntrinsicInst.h
> +++ include/llvm/IR/IntrinsicInst.h
> @@ -361,6 +361,17 @@
>      }
>    };
>  
> +  class InstrProfIncrementInstStep : public InstrProfIncrementInst {
> +  public:
> +    static inline bool classof(const IntrinsicInst *I) {
> +      return I->getIntrinsicID() == Intrinsic::instrprof_increment_step;
> +    }
> +    static inline bool classof(const Value *V) {
> +      return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
> +    }
> +    Value *getStep() const { return const_cast<Value *>(getArgOperand(4)); }
> +  };
> +
>    /// This represents the llvm.instrprof_value_profile intrinsic.
>    class InstrProfValueProfileInst : public IntrinsicInst {
>    public:
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits


More information about the llvm-commits mailing list