[PATCH] D23727: [Profile] SelectInst instrumentation Support in IR-PGO

Xinliang David Li via llvm-commits llvm-commits at lists.llvm.org
Tue Aug 30 21:00:00 PDT 2016


On Tue, Aug 23, 2016 at 11:18 AM, Justin Bogner <mail at justinbogner.com> wrote:
> David Li via llvm-commits <llvm-commits at lists.llvm.org> writes:
>> Select instructions are currently ignored by PGO leading to missing
>> profile data which is useful in guiding the select instruction
>> lowering. This patch implements this missing feature for IRPGO.
>>
>> A new counter increment intrinsic is introduced (extended from
>> existing increment) that also takes a step argument. It is used to
>> track the number times the true value of a select instruction is
>> taken. The FalseValue count is derived from TrueValue count and the
>> parent BB's profile count.
>
> Does this really merit a new intrinsic? Why not just teach the existing
> intrinsic to take a step argument?

If you think this is the right thing to do, I can have a follow up
patch to deprecate the old intrinsic.

I have posted the updated patch with your comments addressed.

Dvaid

>
>> https://reviews.llvm.org/D23727
>  ...
>> davidxl updated this revision to Diff 68884.
>>
>> Index: test/Transforms/PGOProfile/select1.ll
>> ===================================================================
>> --- test/Transforms/PGOProfile/select1.ll
>> +++ test/Transforms/PGOProfile/select1.ll
>> @@ -0,0 +1,34 @@
>> +; RUN: opt < %s -pgo-instr-gen -pgo-instr-select=true -S | FileCheck %s --check-prefix=GEN
>> +; RUN: opt < %s -passes=pgo-instr-gen -pgo-instr-select=true -S | FileCheck %s --check-prefix=GEN
>> +; RUN: opt < %s -pgo-instr-gen -pgo-instr-select=false -S | FileCheck %s --check-prefix=NOSELECT
>> +; RUN: opt < %s -passes=pgo-instr-gen -pgo-instr-select=false -S | FileCheck %s --check-prefix=NOSELECT
>> +; RUN: llvm-profdata merge %S/Inputs/select1.proftext -o %t.profdata
>> +; RUN: opt < %s -pgo-instr-use -pgo-test-profile-file=%t.profdata -pgo-instr-select=true -S | FileCheck %s --check-prefix=USE
>> +; RUN: opt < %s -passes=pgo-instr-use -pgo-test-profile-file=%t.profdata -pgo-instr-select=true -S | FileCheck %s --check-prefix=USE
>> +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
>> +target triple = "x86_64-unknown-linux-gnu"
>> +
>> +define i32 @test_br_2(i32 %i) {
>> +entry:
>> +  %cmp = icmp sgt i32 %i, 0
>> +  br i1 %cmp, label %if.then, label %if.else
>> +
>> +if.then:
>> +  %add = add nsw i32 %i, 2
>> +;GEN: %[[STEP:[0-9]+]] = zext i1 %cmp to i64
>> +;GEN: call void @llvm.instrprof.increment.step({{.*}} i32 3, i32 2, i64 %[[STEP]])
>> +;NOSELECT-NOT: call void @llvm.instrprof.increment.step
>> +  %s = select i1 %cmp, i32 %add, i32 0
>> +;USE: select i1 %cmp{{.*}}, !prof ![[BW_ENTRY:[0-9]+]]
>> +;USE: ![[BW_ENTRY]] = !{!"branch_weights", i32 1, i32 3}
>> +
>> +  br label %if.end
>> +
>> +if.else:
>> +  %sub = sub nsw i32 %i, 2
>> +  br label %if.end
>> +
>> +if.end:
>> +  %retv = phi i32 [ %add, %if.then ], [ %sub, %if.else ]
>> +  ret i32 %retv
>> +}
>> Index: test/Transforms/PGOProfile/Inputs/select1.proftext
>> ===================================================================
>> --- test/Transforms/PGOProfile/Inputs/select1.proftext
>> +++ test/Transforms/PGOProfile/Inputs/select1.proftext
>> @@ -0,0 +1,9 @@
>> +# :ir is the flag to indicate this is IR level profile.
>
> I'm not sure this comment is really helping anybody.
>
>> +:ir
>> +test_br_2
>> +72057623705475732
>> +3
>> +4
>> +1
>> +1
>> +
>> Index: lib/Transforms/Instrumentation/PGOInstrumentation.cpp
>> ===================================================================
>> --- lib/Transforms/Instrumentation/PGOInstrumentation.cpp
>> +++ lib/Transforms/Instrumentation/PGOInstrumentation.cpp
>> @@ -86,6 +86,7 @@
>>  #define DEBUG_TYPE "pgo-instrumentation"
>>
>>  STATISTIC(NumOfPGOInstrument, "Number of edges instrumented.");
>> +STATISTIC(NumOfPGOSelectInsts, "Number of select instruction instrumented.");
>>  STATISTIC(NumOfPGOEdge, "Number of edges.");
>>  STATISTIC(NumOfPGOBB, "Number of basic-blocks.");
>>  STATISTIC(NumOfPGOSplit, "Number of critical edge splits.");
>> @@ -133,7 +134,56 @@
>>  static cl::opt<bool> NoPGOWarnMismatch("no-pgo-warn-mismatch", cl::init(false),
>>                                         cl::Hidden);
>>
>> +// Command line option to enable/disable select instruction instrumentation.
>> +static cl::opt<bool> PGOInstrSelect("pgo-instr-select", cl::init(true),
>> +                                    cl::Hidden);
>>  namespace {
>> +
>> +/// The select instruction visitor plays three roles specified
>> +/// by the mode. In \c VM_counting mode, it simply counts the number of
>> +/// select instructions. In \c VM_instrument mode, it inserts code to count
>> +/// the number times TrueValue of select is taken. In \c VM_annotate mode,
>> +/// it reads the profile data and annotate the select instruction with metadata.
>> +enum VisitMode { VM_counting, VM_instrument, VM_annotate };
>> +class PGOUseFunc;
>> +
>> +/// Instruction Visitor class to visit select instructions.
>> +struct SelectInstVisitor : public InstVisitor<SelectInstVisitor> {
>> +  Function &F;
>> +  unsigned NSIs = 0;            // Number of select instructions instrumented.
>> +  VisitMode Mode = VM_counting; // Visiting mode.
>> +  unsigned *CI = nullptr;       // pointer to current counter index.
>> +  unsigned NC = 0;              // Number of counters
>> +  GlobalVariable *FuncNameVar = nullptr;
>> +  uint64_t FuncHash = 0;
>> +  PGOUseFunc *UseFunc = nullptr;
>> +
>> +  SelectInstVisitor(Function &Func) : F(Func) {}
>> +
>> +  // Set the visitor in \c VM_instrument mode. The initial value
>> +  // of \p *I will be first the counter index for select instructions.
>> +  // \p *I will be updated after each select instruction visit.
>> +  // \p C is the total number of counters in this function.
>> +  void SetCounterIndex(unsigned *I, unsigned C, GlobalVariable *FN,
>> +                       uint64_t FH) {
>> +    Mode = VM_instrument;
>> +    CI = I;
>> +    NC = C;
>> +    FuncHash = FH;
>> +    FuncNameVar = FN;
>> +  }
>> +  // Set the visitor in \c VM_annotate mode. \p UF is the function
>> +  // annotator, and \p I is the pointer to the counter index variable.
>> +  void SetCounterUse(PGOUseFunc *UF, unsigned *I) {
>> +    Mode = VM_annotate;
>> +    UseFunc = UF;
>> +    CI = I;
>> +  }
>> +  // Visit \p SI instruction and perform tasks according to visit mode.
>> +  void visitSelectInst(SelectInst &SI);
>> +  unsigned NumOfSelectInsts() const { return NSIs; }
>
> Please name functions as verbs, and starting with a lower case
> letter. Ie, getNumberOfSelectInsts or something like that.
>
>> +};
>> +
>>  class PGOInstrumentationGenLegacyPass : public ModulePass {
>>  public:
>>    static char ID;
>> @@ -180,6 +230,7 @@
>>      AU.addRequired<BlockFrequencyInfoWrapperPass>();
>>    }
>>  };
>> +
>>  } // end anonymous namespace
>>
>>  char PGOInstrumentationGenLegacyPass::ID = 0;
>> @@ -254,6 +305,7 @@
>>    std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers;
>>
>>  public:
>> +  SelectInstVisitor SIVisitor;
>>    std::string FuncName;
>>    GlobalVariable *FuncNameVar;
>>    // CFG hash value for this function.
>> @@ -280,8 +332,14 @@
>>        std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
>>        bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr,
>>        BlockFrequencyInfo *BFI = nullptr)
>> -      : F(Func), ComdatMembers(ComdatMembers), FunctionHash(0),
>> +      : F(Func), ComdatMembers(ComdatMembers), SIVisitor(Func), FunctionHash(0),
>>          MST(F, BPI, BFI) {
>> +
>> +    // This should be done before CFG hash computation.
>> +    assert(SIVisitor.Mode == VM_counting && "Wrong select visiting mode!");
>> +    SIVisitor.visit(Func);
>> +    NumOfPGOSelectInsts += SIVisitor.NumOfSelectInsts();
>> +
>>      FuncName = getPGOFuncName(F);
>>      computeCFGHash();
>>      if (ComdatMembers.size())
>> @@ -308,7 +366,7 @@
>>        if (!E->InMST && !E->Removed)
>>          NumCounters++;
>>      }
>> -    return NumCounters;
>> +    return NumCounters + SIVisitor.NumOfSelectInsts();
>>    }
>>  };
>>
>> @@ -328,7 +386,8 @@
>>      }
>>    }
>>    JC.update(Indexes);
>> -  FunctionHash = (uint64_t)findIndirectCallSites(F).size() << 48 |
>> +  FunctionHash = (uint64_t)SIVisitor.NumOfSelectInsts() << 56 |
>> +                 (uint64_t)findIndirectCallSites(F).size() << 48 |
>>                   (uint64_t)MST.AllEdges.size() << 32 | JC.getCRC();
>>  }
>>
>> @@ -473,6 +532,11 @@
>>           Builder.getInt64(FuncInfo.FunctionHash), Builder.getInt32(NumCounters),
>>           Builder.getInt32(I++)});
>>    }
>> +
>> +  // Now instrument select instructions:
>> +  FuncInfo.SIVisitor.SetCounterIndex(&I, NumCounters, FuncInfo.FuncNameVar,
>> +                                     FuncInfo.FunctionHash);
>> +  FuncInfo.SIVisitor.visit(F);
>
> The statefulness of SIVisitor is kind of awkward - it isn't obvious to
> me that SetCounterIndex would change the mode, and the assert earlier is
> kind of unfortunate. Maybe instead of calling visit() in each mode we
> should just have a different function for each operation? The three call
> sites could then countSelects, instrumentSelects, and annotateSelects,
> which saves a call to Set* and makes the comments redundant.
>
>>    assert(I == NumCounters);
>>
>>    if (DisableValueProfiling)
>> @@ -594,17 +658,17 @@
>>    // Return the profile record for this function;
>>    InstrProfRecord &getProfileRecord() { return ProfileRecord; }
>>
>> +  // Return the auxiliary BB information.
>> +  UseBBInfo &getBBInfo(const BasicBlock *BB) const {
>> +    return FuncInfo.getBBInfo(BB);
>> +  }
>> +
>>  private:
>>    Function &F;
>>    Module *M;
>>    // This member stores the shared information with class PGOGenFunc.
>>    FuncPGOInstrumentation<PGOUseEdge, UseBBInfo> FuncInfo;
>>
>> -  // Return the auxiliary BB information.
>> -  UseBBInfo &getBBInfo(const BasicBlock *BB) const {
>> -    return FuncInfo.getBBInfo(BB);
>> -  }
>> -
>>    // The maximum count value in the profile. This is only used in PGO use
>>    // compilation.
>>    uint64_t ProgramMaxCount;
>> @@ -677,6 +741,9 @@
>>      NewEdge1.InMST = true;
>>      getBBInfo(InstrBB).setBBInfoCount(CountValue);
>>    }
>> +  // Now annotate select instructions
>> +  FuncInfo.SIVisitor.SetCounterUse(this, &I);
>> +  FuncInfo.SIVisitor.visit(F);
>>    assert(I == CountFromProfile.size());
>>  }
>>
>> @@ -820,7 +887,7 @@
>>    DEBUG(FuncInfo.dumpInfo("after reading profile."));
>>  }
>>
>> -static void setProfMetadata(Module *M, TerminatorInst *TI,
>> +static void setProfMetadata(Module *M, Instruction *TI,
>>                              ArrayRef<uint64_t> EdgeCounts, uint64_t MaxCount) {
>>    MDBuilder MDB(M->getContext());
>>    assert(MaxCount > 0 && "Bad max count");
>> @@ -869,6 +936,45 @@
>>    }
>>  }
>>
>> +void SelectInstVisitor::visitSelectInst(SelectInst &SI) {
>> +  if (!PGOInstrSelect)
>> +    return;
>> +  // FIXME: do not handle this yet.
>> +  if (SI.getCondition()->getType()->isVectorTy())
>> +    return;
>> +
>> +  NSIs++;
>> +  if (Mode == VM_counting)
>> +    return;
>> +  else if (Mode == VM_instrument) {
>> +    Module *M = F.getParent();
>> +    IRBuilder<> Builder(&SI);
>> +    Type *Int64Ty = Builder.getInt64Ty();
>> +    Type *I8PtrTy = Builder.getInt8PtrTy();
>> +    auto *Step = Builder.CreateZExt(SI.getCondition(), Int64Ty);
>> +    Builder.CreateCall(
>> +        Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment_step),
>> +        {llvm::ConstantExpr::getBitCast(FuncNameVar, I8PtrTy),
>> +         Builder.getInt64(FuncHash), Builder.getInt32(NC),
>> +         Builder.getInt32(*CI), Step});
>> +    ++(*CI);
>> +  } else {
>> +    assert(Mode == VM_annotate && "Unknown visiting mode");
>> +    std::vector<uint64_t> &CountFromProfile =
>> +        UseFunc->getProfileRecord().Counts;
>> +    assert(*CI < CountFromProfile.size() && "Out of bound access of counters");
>> +    uint64_t SCounts[2];
>> +    SCounts[0] = CountFromProfile[*CI]; // True count
>> +    ++(*CI);
>> +    uint64_t TotalCount = UseFunc->getBBInfo(SI.getParent()).CountValue;
>> +    // False Count
>> +    SCounts[1] = (TotalCount > SCounts[0] ? TotalCount - SCounts[0] : 0);
>> +    uint64_t MaxCount = std::max(SCounts[0], SCounts[1]);
>> +
>> +    setProfMetadata(F.getParent(), &SI, SCounts, MaxCount);
>> +  }
>
> I prefer the three functions, but for future reference a switch
> statement (with no default label) would be better here - that way we'd
> get a -Wcovered-switch if we added another mode and forgot to update a
> spot.
>
>> +}
>> +
>>  // Traverse all the indirect callsites and annotate the instructions.
>>  void PGOUseFunc::annotateIndirectCallSites() {
>>    if (DisableValueProfiling)
>> Index: lib/Transforms/Instrumentation/InstrProfiling.cpp
>> ===================================================================
>> --- lib/Transforms/Instrumentation/InstrProfiling.cpp
>> +++ lib/Transforms/Instrumentation/InstrProfiling.cpp
>> @@ -107,6 +107,13 @@
>>    return getInstrProfCoverageSectionName(isMachO());
>>  }
>>
>> +static InstrProfIncrementInst *castToIncrementInst(Instruction *Instr) {
>> +  InstrProfIncrementInst *Inc = dyn_cast<InstrProfIncrementInstStep>(Instr);
>> +  if (Inc)
>> +    return Inc;
>> +  return dyn_cast<InstrProfIncrementInst>(Instr);
>> +}
>> +
>>  bool InstrProfiling::run(Module &M) {
>>    bool MadeChange = false;
>>
>> @@ -138,7 +145,8 @@
>>      for (BasicBlock &BB : F)
>>        for (auto I = BB.begin(), E = BB.end(); I != E;) {
>>          auto Instr = I++;
>> -        if (auto *Inc = dyn_cast<InstrProfIncrementInst>(Instr)) {
>> +        InstrProfIncrementInst *Inc = castToIncrementInst(&*Instr);
>> +        if (Inc) {
>>            lowerIncrement(Inc);
>>            MadeChange = true;
>>          } else if (auto *Ind = dyn_cast<InstrProfValueProfileInst>(Instr)) {
>> @@ -214,14 +222,22 @@
>>    Ind->eraseFromParent();
>>  }
>>
>> +static Value *getIncrementStep(InstrProfIncrementInst *Inc,
>> +                               IRBuilder<> &Builder) {
>> +  auto *IncWithStep = dyn_cast<InstrProfIncrementInstStep>(Inc);
>> +  if (IncWithStep)
>> +    return IncWithStep->getStep();
>> +  return Builder.getInt64(1);
>> +}
>> +
>>  void InstrProfiling::lowerIncrement(InstrProfIncrementInst *Inc) {
>>    GlobalVariable *Counters = getOrCreateRegionCounters(Inc);
>>
>>    IRBuilder<> Builder(Inc);
>>    uint64_t Index = Inc->getIndex()->getZExtValue();
>>    Value *Addr = Builder.CreateConstInBoundsGEP2_64(Counters, 0, Index);
>>    Value *Count = Builder.CreateLoad(Addr, "pgocount");
>> -  Count = Builder.CreateAdd(Count, Builder.getInt64(1));
>> +  Count = Builder.CreateAdd(Count, getIncrementStep(Inc, Builder));
>>    Inc->replaceAllUsesWith(Builder.CreateStore(Count, Addr));
>>    Inc->eraseFromParent();
>>  }
>> Index: include/llvm/IR/Intrinsics.td
>> ===================================================================
>> --- include/llvm/IR/Intrinsics.td
>> +++ include/llvm/IR/Intrinsics.td
>> @@ -346,6 +346,12 @@
>>                                           llvm_i32_ty, llvm_i32_ty],
>>                                          []>;
>>
>> +// A counter increment with step for instrumentation based profiling.
>> +def int_instrprof_increment_step : Intrinsic<[],
>> +                                        [llvm_ptr_ty, llvm_i64_ty,
>> +                                         llvm_i32_ty, llvm_i32_ty, llvm_i64_ty],
>> +                                        []>;
>> +
>>  // A call to profile runtime for value profiling of target expressions
>>  // through instrumentation based profiling.
>>  def int_instrprof_value_profile : Intrinsic<[],
>> Index: include/llvm/IR/IntrinsicInst.h
>> ===================================================================
>> --- include/llvm/IR/IntrinsicInst.h
>> +++ include/llvm/IR/IntrinsicInst.h
>> @@ -361,6 +361,17 @@
>>      }
>>    };
>>
>> +  class InstrProfIncrementInstStep : public InstrProfIncrementInst {
>> +  public:
>> +    static inline bool classof(const IntrinsicInst *I) {
>> +      return I->getIntrinsicID() == Intrinsic::instrprof_increment_step;
>> +    }
>> +    static inline bool classof(const Value *V) {
>> +      return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
>> +    }
>> +    Value *getStep() const { return const_cast<Value *>(getArgOperand(4)); }
>> +  };
>> +
>>    /// This represents the llvm.instrprof_value_profile intrinsic.
>>    class InstrProfValueProfileInst : public IntrinsicInst {
>>    public:
>> _______________________________________________
>> llvm-commits mailing list
>> llvm-commits at lists.llvm.org
>> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits


More information about the llvm-commits mailing list