[PATCH] D23727: [Profile] SelectInst instrumentation Support in IR-PGO
Xinliang David Li via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 30 21:00:00 PDT 2016
On Tue, Aug 23, 2016 at 11:18 AM, Justin Bogner <mail at justinbogner.com> wrote:
> David Li via llvm-commits <llvm-commits at lists.llvm.org> writes:
>> Select instructions are currently ignored by PGO leading to missing
>> profile data which is useful in guiding the select instruction
>> lowering. This patch implements this missing feature for IRPGO.
>>
>> A new counter increment intrinsic is introduced (extended from
>> existing increment) that also takes a step argument. It is used to
>> track the number times the true value of a select instruction is
>> taken. The FalseValue count is derived from TrueValue count and the
>> parent BB's profile count.
>
> Does this really merit a new intrinsic? Why not just teach the existing
> intrinsic to take a step argument?
If you think this is the right thing to do, I can have a follow up
patch to deprecate the old intrinsic.
I have posted the updated patch with your comments addressed.
Dvaid
>
>> https://reviews.llvm.org/D23727
> ...
>> davidxl updated this revision to Diff 68884.
>>
>> Index: test/Transforms/PGOProfile/select1.ll
>> ===================================================================
>> --- test/Transforms/PGOProfile/select1.ll
>> +++ test/Transforms/PGOProfile/select1.ll
>> @@ -0,0 +1,34 @@
>> +; RUN: opt < %s -pgo-instr-gen -pgo-instr-select=true -S | FileCheck %s --check-prefix=GEN
>> +; RUN: opt < %s -passes=pgo-instr-gen -pgo-instr-select=true -S | FileCheck %s --check-prefix=GEN
>> +; RUN: opt < %s -pgo-instr-gen -pgo-instr-select=false -S | FileCheck %s --check-prefix=NOSELECT
>> +; RUN: opt < %s -passes=pgo-instr-gen -pgo-instr-select=false -S | FileCheck %s --check-prefix=NOSELECT
>> +; RUN: llvm-profdata merge %S/Inputs/select1.proftext -o %t.profdata
>> +; RUN: opt < %s -pgo-instr-use -pgo-test-profile-file=%t.profdata -pgo-instr-select=true -S | FileCheck %s --check-prefix=USE
>> +; RUN: opt < %s -passes=pgo-instr-use -pgo-test-profile-file=%t.profdata -pgo-instr-select=true -S | FileCheck %s --check-prefix=USE
>> +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
>> +target triple = "x86_64-unknown-linux-gnu"
>> +
>> +define i32 @test_br_2(i32 %i) {
>> +entry:
>> + %cmp = icmp sgt i32 %i, 0
>> + br i1 %cmp, label %if.then, label %if.else
>> +
>> +if.then:
>> + %add = add nsw i32 %i, 2
>> +;GEN: %[[STEP:[0-9]+]] = zext i1 %cmp to i64
>> +;GEN: call void @llvm.instrprof.increment.step({{.*}} i32 3, i32 2, i64 %[[STEP]])
>> +;NOSELECT-NOT: call void @llvm.instrprof.increment.step
>> + %s = select i1 %cmp, i32 %add, i32 0
>> +;USE: select i1 %cmp{{.*}}, !prof ![[BW_ENTRY:[0-9]+]]
>> +;USE: ![[BW_ENTRY]] = !{!"branch_weights", i32 1, i32 3}
>> +
>> + br label %if.end
>> +
>> +if.else:
>> + %sub = sub nsw i32 %i, 2
>> + br label %if.end
>> +
>> +if.end:
>> + %retv = phi i32 [ %add, %if.then ], [ %sub, %if.else ]
>> + ret i32 %retv
>> +}
>> Index: test/Transforms/PGOProfile/Inputs/select1.proftext
>> ===================================================================
>> --- test/Transforms/PGOProfile/Inputs/select1.proftext
>> +++ test/Transforms/PGOProfile/Inputs/select1.proftext
>> @@ -0,0 +1,9 @@
>> +# :ir is the flag to indicate this is IR level profile.
>
> I'm not sure this comment is really helping anybody.
>
>> +:ir
>> +test_br_2
>> +72057623705475732
>> +3
>> +4
>> +1
>> +1
>> +
>> Index: lib/Transforms/Instrumentation/PGOInstrumentation.cpp
>> ===================================================================
>> --- lib/Transforms/Instrumentation/PGOInstrumentation.cpp
>> +++ lib/Transforms/Instrumentation/PGOInstrumentation.cpp
>> @@ -86,6 +86,7 @@
>> #define DEBUG_TYPE "pgo-instrumentation"
>>
>> STATISTIC(NumOfPGOInstrument, "Number of edges instrumented.");
>> +STATISTIC(NumOfPGOSelectInsts, "Number of select instruction instrumented.");
>> STATISTIC(NumOfPGOEdge, "Number of edges.");
>> STATISTIC(NumOfPGOBB, "Number of basic-blocks.");
>> STATISTIC(NumOfPGOSplit, "Number of critical edge splits.");
>> @@ -133,7 +134,56 @@
>> static cl::opt<bool> NoPGOWarnMismatch("no-pgo-warn-mismatch", cl::init(false),
>> cl::Hidden);
>>
>> +// Command line option to enable/disable select instruction instrumentation.
>> +static cl::opt<bool> PGOInstrSelect("pgo-instr-select", cl::init(true),
>> + cl::Hidden);
>> namespace {
>> +
>> +/// The select instruction visitor plays three roles specified
>> +/// by the mode. In \c VM_counting mode, it simply counts the number of
>> +/// select instructions. In \c VM_instrument mode, it inserts code to count
>> +/// the number times TrueValue of select is taken. In \c VM_annotate mode,
>> +/// it reads the profile data and annotate the select instruction with metadata.
>> +enum VisitMode { VM_counting, VM_instrument, VM_annotate };
>> +class PGOUseFunc;
>> +
>> +/// Instruction Visitor class to visit select instructions.
>> +struct SelectInstVisitor : public InstVisitor<SelectInstVisitor> {
>> + Function &F;
>> + unsigned NSIs = 0; // Number of select instructions instrumented.
>> + VisitMode Mode = VM_counting; // Visiting mode.
>> + unsigned *CI = nullptr; // pointer to current counter index.
>> + unsigned NC = 0; // Number of counters
>> + GlobalVariable *FuncNameVar = nullptr;
>> + uint64_t FuncHash = 0;
>> + PGOUseFunc *UseFunc = nullptr;
>> +
>> + SelectInstVisitor(Function &Func) : F(Func) {}
>> +
>> + // Set the visitor in \c VM_instrument mode. The initial value
>> + // of \p *I will be first the counter index for select instructions.
>> + // \p *I will be updated after each select instruction visit.
>> + // \p C is the total number of counters in this function.
>> + void SetCounterIndex(unsigned *I, unsigned C, GlobalVariable *FN,
>> + uint64_t FH) {
>> + Mode = VM_instrument;
>> + CI = I;
>> + NC = C;
>> + FuncHash = FH;
>> + FuncNameVar = FN;
>> + }
>> + // Set the visitor in \c VM_annotate mode. \p UF is the function
>> + // annotator, and \p I is the pointer to the counter index variable.
>> + void SetCounterUse(PGOUseFunc *UF, unsigned *I) {
>> + Mode = VM_annotate;
>> + UseFunc = UF;
>> + CI = I;
>> + }
>> + // Visit \p SI instruction and perform tasks according to visit mode.
>> + void visitSelectInst(SelectInst &SI);
>> + unsigned NumOfSelectInsts() const { return NSIs; }
>
> Please name functions as verbs, and starting with a lower case
> letter. Ie, getNumberOfSelectInsts or something like that.
>
>> +};
>> +
>> class PGOInstrumentationGenLegacyPass : public ModulePass {
>> public:
>> static char ID;
>> @@ -180,6 +230,7 @@
>> AU.addRequired<BlockFrequencyInfoWrapperPass>();
>> }
>> };
>> +
>> } // end anonymous namespace
>>
>> char PGOInstrumentationGenLegacyPass::ID = 0;
>> @@ -254,6 +305,7 @@
>> std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers;
>>
>> public:
>> + SelectInstVisitor SIVisitor;
>> std::string FuncName;
>> GlobalVariable *FuncNameVar;
>> // CFG hash value for this function.
>> @@ -280,8 +332,14 @@
>> std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
>> bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr,
>> BlockFrequencyInfo *BFI = nullptr)
>> - : F(Func), ComdatMembers(ComdatMembers), FunctionHash(0),
>> + : F(Func), ComdatMembers(ComdatMembers), SIVisitor(Func), FunctionHash(0),
>> MST(F, BPI, BFI) {
>> +
>> + // This should be done before CFG hash computation.
>> + assert(SIVisitor.Mode == VM_counting && "Wrong select visiting mode!");
>> + SIVisitor.visit(Func);
>> + NumOfPGOSelectInsts += SIVisitor.NumOfSelectInsts();
>> +
>> FuncName = getPGOFuncName(F);
>> computeCFGHash();
>> if (ComdatMembers.size())
>> @@ -308,7 +366,7 @@
>> if (!E->InMST && !E->Removed)
>> NumCounters++;
>> }
>> - return NumCounters;
>> + return NumCounters + SIVisitor.NumOfSelectInsts();
>> }
>> };
>>
>> @@ -328,7 +386,8 @@
>> }
>> }
>> JC.update(Indexes);
>> - FunctionHash = (uint64_t)findIndirectCallSites(F).size() << 48 |
>> + FunctionHash = (uint64_t)SIVisitor.NumOfSelectInsts() << 56 |
>> + (uint64_t)findIndirectCallSites(F).size() << 48 |
>> (uint64_t)MST.AllEdges.size() << 32 | JC.getCRC();
>> }
>>
>> @@ -473,6 +532,11 @@
>> Builder.getInt64(FuncInfo.FunctionHash), Builder.getInt32(NumCounters),
>> Builder.getInt32(I++)});
>> }
>> +
>> + // Now instrument select instructions:
>> + FuncInfo.SIVisitor.SetCounterIndex(&I, NumCounters, FuncInfo.FuncNameVar,
>> + FuncInfo.FunctionHash);
>> + FuncInfo.SIVisitor.visit(F);
>
> The statefulness of SIVisitor is kind of awkward - it isn't obvious to
> me that SetCounterIndex would change the mode, and the assert earlier is
> kind of unfortunate. Maybe instead of calling visit() in each mode we
> should just have a different function for each operation? The three call
> sites could then countSelects, instrumentSelects, and annotateSelects,
> which saves a call to Set* and makes the comments redundant.
>
>> assert(I == NumCounters);
>>
>> if (DisableValueProfiling)
>> @@ -594,17 +658,17 @@
>> // Return the profile record for this function;
>> InstrProfRecord &getProfileRecord() { return ProfileRecord; }
>>
>> + // Return the auxiliary BB information.
>> + UseBBInfo &getBBInfo(const BasicBlock *BB) const {
>> + return FuncInfo.getBBInfo(BB);
>> + }
>> +
>> private:
>> Function &F;
>> Module *M;
>> // This member stores the shared information with class PGOGenFunc.
>> FuncPGOInstrumentation<PGOUseEdge, UseBBInfo> FuncInfo;
>>
>> - // Return the auxiliary BB information.
>> - UseBBInfo &getBBInfo(const BasicBlock *BB) const {
>> - return FuncInfo.getBBInfo(BB);
>> - }
>> -
>> // The maximum count value in the profile. This is only used in PGO use
>> // compilation.
>> uint64_t ProgramMaxCount;
>> @@ -677,6 +741,9 @@
>> NewEdge1.InMST = true;
>> getBBInfo(InstrBB).setBBInfoCount(CountValue);
>> }
>> + // Now annotate select instructions
>> + FuncInfo.SIVisitor.SetCounterUse(this, &I);
>> + FuncInfo.SIVisitor.visit(F);
>> assert(I == CountFromProfile.size());
>> }
>>
>> @@ -820,7 +887,7 @@
>> DEBUG(FuncInfo.dumpInfo("after reading profile."));
>> }
>>
>> -static void setProfMetadata(Module *M, TerminatorInst *TI,
>> +static void setProfMetadata(Module *M, Instruction *TI,
>> ArrayRef<uint64_t> EdgeCounts, uint64_t MaxCount) {
>> MDBuilder MDB(M->getContext());
>> assert(MaxCount > 0 && "Bad max count");
>> @@ -869,6 +936,45 @@
>> }
>> }
>>
>> +void SelectInstVisitor::visitSelectInst(SelectInst &SI) {
>> + if (!PGOInstrSelect)
>> + return;
>> + // FIXME: do not handle this yet.
>> + if (SI.getCondition()->getType()->isVectorTy())
>> + return;
>> +
>> + NSIs++;
>> + if (Mode == VM_counting)
>> + return;
>> + else if (Mode == VM_instrument) {
>> + Module *M = F.getParent();
>> + IRBuilder<> Builder(&SI);
>> + Type *Int64Ty = Builder.getInt64Ty();
>> + Type *I8PtrTy = Builder.getInt8PtrTy();
>> + auto *Step = Builder.CreateZExt(SI.getCondition(), Int64Ty);
>> + Builder.CreateCall(
>> + Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment_step),
>> + {llvm::ConstantExpr::getBitCast(FuncNameVar, I8PtrTy),
>> + Builder.getInt64(FuncHash), Builder.getInt32(NC),
>> + Builder.getInt32(*CI), Step});
>> + ++(*CI);
>> + } else {
>> + assert(Mode == VM_annotate && "Unknown visiting mode");
>> + std::vector<uint64_t> &CountFromProfile =
>> + UseFunc->getProfileRecord().Counts;
>> + assert(*CI < CountFromProfile.size() && "Out of bound access of counters");
>> + uint64_t SCounts[2];
>> + SCounts[0] = CountFromProfile[*CI]; // True count
>> + ++(*CI);
>> + uint64_t TotalCount = UseFunc->getBBInfo(SI.getParent()).CountValue;
>> + // False Count
>> + SCounts[1] = (TotalCount > SCounts[0] ? TotalCount - SCounts[0] : 0);
>> + uint64_t MaxCount = std::max(SCounts[0], SCounts[1]);
>> +
>> + setProfMetadata(F.getParent(), &SI, SCounts, MaxCount);
>> + }
>
> I prefer the three functions, but for future reference a switch
> statement (with no default label) would be better here - that way we'd
> get a -Wcovered-switch if we added another mode and forgot to update a
> spot.
>
>> +}
>> +
>> // Traverse all the indirect callsites and annotate the instructions.
>> void PGOUseFunc::annotateIndirectCallSites() {
>> if (DisableValueProfiling)
>> Index: lib/Transforms/Instrumentation/InstrProfiling.cpp
>> ===================================================================
>> --- lib/Transforms/Instrumentation/InstrProfiling.cpp
>> +++ lib/Transforms/Instrumentation/InstrProfiling.cpp
>> @@ -107,6 +107,13 @@
>> return getInstrProfCoverageSectionName(isMachO());
>> }
>>
>> +static InstrProfIncrementInst *castToIncrementInst(Instruction *Instr) {
>> + InstrProfIncrementInst *Inc = dyn_cast<InstrProfIncrementInstStep>(Instr);
>> + if (Inc)
>> + return Inc;
>> + return dyn_cast<InstrProfIncrementInst>(Instr);
>> +}
>> +
>> bool InstrProfiling::run(Module &M) {
>> bool MadeChange = false;
>>
>> @@ -138,7 +145,8 @@
>> for (BasicBlock &BB : F)
>> for (auto I = BB.begin(), E = BB.end(); I != E;) {
>> auto Instr = I++;
>> - if (auto *Inc = dyn_cast<InstrProfIncrementInst>(Instr)) {
>> + InstrProfIncrementInst *Inc = castToIncrementInst(&*Instr);
>> + if (Inc) {
>> lowerIncrement(Inc);
>> MadeChange = true;
>> } else if (auto *Ind = dyn_cast<InstrProfValueProfileInst>(Instr)) {
>> @@ -214,14 +222,22 @@
>> Ind->eraseFromParent();
>> }
>>
>> +static Value *getIncrementStep(InstrProfIncrementInst *Inc,
>> + IRBuilder<> &Builder) {
>> + auto *IncWithStep = dyn_cast<InstrProfIncrementInstStep>(Inc);
>> + if (IncWithStep)
>> + return IncWithStep->getStep();
>> + return Builder.getInt64(1);
>> +}
>> +
>> void InstrProfiling::lowerIncrement(InstrProfIncrementInst *Inc) {
>> GlobalVariable *Counters = getOrCreateRegionCounters(Inc);
>>
>> IRBuilder<> Builder(Inc);
>> uint64_t Index = Inc->getIndex()->getZExtValue();
>> Value *Addr = Builder.CreateConstInBoundsGEP2_64(Counters, 0, Index);
>> Value *Count = Builder.CreateLoad(Addr, "pgocount");
>> - Count = Builder.CreateAdd(Count, Builder.getInt64(1));
>> + Count = Builder.CreateAdd(Count, getIncrementStep(Inc, Builder));
>> Inc->replaceAllUsesWith(Builder.CreateStore(Count, Addr));
>> Inc->eraseFromParent();
>> }
>> Index: include/llvm/IR/Intrinsics.td
>> ===================================================================
>> --- include/llvm/IR/Intrinsics.td
>> +++ include/llvm/IR/Intrinsics.td
>> @@ -346,6 +346,12 @@
>> llvm_i32_ty, llvm_i32_ty],
>> []>;
>>
>> +// A counter increment with step for instrumentation based profiling.
>> +def int_instrprof_increment_step : Intrinsic<[],
>> + [llvm_ptr_ty, llvm_i64_ty,
>> + llvm_i32_ty, llvm_i32_ty, llvm_i64_ty],
>> + []>;
>> +
>> // A call to profile runtime for value profiling of target expressions
>> // through instrumentation based profiling.
>> def int_instrprof_value_profile : Intrinsic<[],
>> Index: include/llvm/IR/IntrinsicInst.h
>> ===================================================================
>> --- include/llvm/IR/IntrinsicInst.h
>> +++ include/llvm/IR/IntrinsicInst.h
>> @@ -361,6 +361,17 @@
>> }
>> };
>>
>> + class InstrProfIncrementInstStep : public InstrProfIncrementInst {
>> + public:
>> + static inline bool classof(const IntrinsicInst *I) {
>> + return I->getIntrinsicID() == Intrinsic::instrprof_increment_step;
>> + }
>> + static inline bool classof(const Value *V) {
>> + return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
>> + }
>> + Value *getStep() const { return const_cast<Value *>(getArgOperand(4)); }
>> + };
>> +
>> /// This represents the llvm.instrprof_value_profile intrinsic.
>> class InstrProfValueProfileInst : public IntrinsicInst {
>> public:
>> _______________________________________________
>> llvm-commits mailing list
>> llvm-commits at lists.llvm.org
>> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
More information about the llvm-commits
mailing list