[PATCH] Clang changes for indirect call target profiling

Justin Bogner mail at justinbogner.com
Thu May 21 16:10:15 PDT 2015


Betul Buyukkurt <betulb at codeaurora.org> writes:
> Updated CL to reflect recent IRBuilder related changes.
>
>
> http://reviews.llvm.org/D8940
>
> Files:
>   lib/CodeGen/CGCall.cpp
>   lib/CodeGen/CGClass.cpp
>   lib/CodeGen/CodeGenPGO.cpp
>   lib/CodeGen/CodeGenPGO.h
>   test/Profile/c-indirect-call.c
>   test/Profile/c-linkage-available_externally.c
>
> EMAIL PREFERENCES
>   http://reviews.llvm.org/settings/panel/emailpreferences/
>
> Index: lib/CodeGen/CGCall.cpp
> ===================================================================
> --- lib/CodeGen/CGCall.cpp
> +++ lib/CodeGen/CGCall.cpp
> @@ -3403,6 +3403,9 @@
>    CS.setAttributes(Attrs);
>    CS.setCallingConv(static_cast<llvm::CallingConv::ID>(CallingConv));
>
> +  // Insert instrumentation or attach profile metadata at indirect call sites
> +  PGO.profileIndirectCallTargets(Builder, Callee, CS.getInstruction());
> +
>    // In ObjC ARC mode with no ObjC ARC exception safety, tell the ARC
>    // optimizer it can aggressively ignore unwind edges.
>    if (CGM.getLangOpts().ObjCAutoRefCount)
> Index: lib/CodeGen/CodeGenPGO.cpp
> ===================================================================
> --- lib/CodeGen/CodeGenPGO.cpp
> +++ lib/CodeGen/CodeGenPGO.cpp
> @@ -23,6 +23,11 @@
>  #include "llvm/Support/FileSystem.h"
>  #include "llvm/Support/MD5.h"
>
> +static llvm::cl::opt<bool> DisableIndirectCallTargetsProfiling(
> +  "disable-ic-targets-profiling", llvm::cl::ZeroOrMore,
> +  llvm::cl::desc("Disable profiling of indirect calls"),
> +  llvm::cl::init(false));
> +

Don't use cl::opt in clang. This should be a clang driver and/or -cc1
option. I also think it should be opt-in for now, rather than opt-out.

>  using namespace clang;
>  using namespace CodeGen;
>
> @@ -674,6 +679,7 @@
>    if (PGOReader) {
>      SourceManager &SM = CGM.getContext().getSourceManager();
>      loadRegionCounts(PGOReader, SM.isInMainFile(D->getLocation()));
> +    loadValueProfilingData(PGOReader, SM.isInMainFile(D->getLocation()));
>      computeRegionCounts(D);
>      applyFunctionAttributes(PGOReader, Fn);
>    }
> @@ -790,12 +796,98 @@
>                        Builder.getInt32(Counter)});
>  }
>
> +static inline std::string getRawFuncName(std::string PrefixedFunctionName) {
> +  // For local symbols, the main file name is prepended to the function name
> +  // to distinguish them.
> +  std::size_t SeparatorPosition = PrefixedFunctionName.find(":");
> +  if (SeparatorPosition == std::string::npos)
> +    return PrefixedFunctionName;
> +  return PrefixedFunctionName.substr(++SeparatorPosition);
> +}

You can't do this. It won't work. The ':' character can appear in names
that aren't prefixed as well (like in objective C).

> +
> +void CodeGenPGO::getIndirectTargetData(unsigned CounterIndex,
> +  uint64_t &Sum, std::multimap<uint64_t, std::string> &OutputData) {

Why a multimap? It seems like what callers of this really want is "The
top <N> targets", right? Maybe this should collect the data, sort it by
most called and return the top <N> given as a parameter?

> +  Sum = 0;
> +  OutputData.clear();
> +  for (unsigned I = 0; I < IndirectTargetResults.size(); ++I) {
> +    llvm::InstrProfValueRecord *Current = &IndirectTargetResults[I];
> +    if (CounterIndex == Current->CounterIndex) {
> +      OutputData.insert(std::pair<uint64_t, std::string>(
> +        Current->NumTaken, getRawFuncName(Current->Name.str())));
> +      Sum += Current->NumTaken;
> +    }
> +  }
> +}
> +
> +// This method either inserts a call to the profile run-time during
> +// instrumentation or puts profile data into metadata for PGO use.
> +void CodeGenPGO::profileIndirectCallTargets(CGBuilderTy &Builder,

I'd probably split the intrinsic generation and profile reading parts of
this into two helper functions - it'll make it a little easier to read.

> +  llvm::Value* Callee, llvm::Instruction *CallSite) {
> +
> +  if (DisableIndirectCallTargetsProfiling)
> +    return;
> +
> +  if (!Callee || !CallSite || !Builder.GetInsertPoint())
> +    return;
> +
> +  bool InstrumentCallSites = CGM.getCodeGenOpts().ProfileInstrGenerate;
> +  llvm::IndexedInstrProfReader *PGOReader = CGM.getPGOReader();
> +  if (!InstrumentCallSites && !PGOReader)
> +    return;
> +
> +  llvm::Function *DirectCallee = nullptr;
> +  if (const llvm::ConstantExpr *CE = dyn_cast<llvm::ConstantExpr>(Callee))
> +    DirectCallee = dyn_cast<llvm::Function> (CE->getOperand(0));
> +  else DirectCallee = dyn_cast<llvm::Function> (Callee);
> +  if (DirectCallee) return;
> +
> +  llvm::LLVMContext &Ctx = CGM.getLLVMContext();
> +  if (InstrumentCallSites && RegionCounterMap) {
> +    auto *I8PtrTy = llvm::Type::getInt8PtrTy(Ctx);
> +    llvm::Value *Args[5] = {
> +      llvm::ConstantExpr::getBitCast(FuncNameVar, I8PtrTy),
> +      Builder.getInt64(FunctionHash),
> +      Builder.getInt32(llvm::instr_value_prof_kind::indirect_call_target),
> +      Builder.CreatePtrToInt(Callee, Builder.getInt64Ty()),
> +      Builder.getInt32(NumIndirectCallSites++)
> +    };
> +    Builder.CreateCall(
> +      CGM.getIntrinsic(llvm::Intrinsic::instrprof_instrument_target), Args);
> +  }
> +  if (PGOReader && haveRegionCounts()) {
> +    // We record the top most called five function at each call site.
> +    // Profile metadata contains "indirect_call_targets" string identifying
> +    // this metadata, uint64_t value for the total number of times the call
> +    // is executed, followed by the function names and execution counts
> +    // (uint64_t) of each function.
> +    uint64_t Sum;
> +    std::multimap<uint64_t, std::string> IndirectCallSiteData;
> +    getIndirectTargetData(NumIndirectCallSites, Sum, IndirectCallSiteData);
> +    std::multimap<uint64_t, std::string>::reverse_iterator I =
> +      IndirectCallSiteData.rbegin(), E = IndirectCallSiteData.rend();
> +
> +    llvm::MDBuilder MDHelper(Ctx);
> +    SmallVector<llvm::Metadata*, 2> Vals;
> +    Vals.push_back(MDHelper.createString("indirect_call_targets"));
> +    Vals.push_back(MDHelper.createConstant(
> +      llvm::ConstantInt::get(llvm::Type::getInt64Ty(Ctx), Sum)));
> +    for (unsigned Count = 0; (I != E) && (Count < 5); ++I, ++Count) {
> +      Vals.push_back(MDHelper.createString(I->second));
> +      Vals.push_back(MDHelper.createConstant(
> +        llvm::ConstantInt::get(llvm::Type::getInt64Ty(Ctx), I->first)));
> +    }

This iteration will be much simpler with the change to
getIndirectTargetData above.

> +    CallSite->setMetadata(
> +      llvm::LLVMContext::MD_prof, llvm::MDNode::get(Ctx, Vals));
> +    NumIndirectCallSites++;
> +  }
> +}
> +
>  void CodeGenPGO::loadRegionCounts(llvm::IndexedInstrProfReader *PGOReader,
>                                    bool IsInMainFile) {
>    CGM.getPGOStats().addVisited(IsInMainFile);
>    RegionCounts.clear();
>    if (std::error_code EC =
> -          PGOReader->getFunctionCounts(FuncName, FunctionHash, RegionCounts)) {
> +         PGOReader->getFunctionCounts(FuncName, FunctionHash, RegionCounts)) {

Arbitrary whitespace change? Please remove this.

>      if (EC == llvm::instrprof_error::unknown_function)
>        CGM.getPGOStats().addMissing(IsInMainFile);
>      else if (EC == llvm::instrprof_error::hash_mismatch)
> @@ -807,6 +899,23 @@
>    }
>  }
>
> +void CodeGenPGO::loadValueProfilingData(
> +  llvm::IndexedInstrProfReader *PGOReader, bool IsInMainFile) {
> +  IndirectTargetResults.clear();
> +  if (std::error_code EC = PGOReader->getFunctionValuesForKind(FuncName,
> +      FunctionHash, llvm::instr_value_prof_kind::indirect_call_target,
> +      IndirectTargetResults)) {
> +    if (EC == llvm::instrprof_error::unknown_function)
> +      CGM.getPGOStats().addMissing(IsInMainFile);
> +    else if (EC == llvm::instrprof_error::hash_mismatch)
> +      CGM.getPGOStats().addMismatched(IsInMainFile);
> +    else if (EC == llvm::instrprof_error::malformed)
> +      // TODO: Consider a more specific warning for this case.
> +      CGM.getPGOStats().addMismatched(IsInMainFile);
> +    IndirectTargetResults.clear();
> +  }
> +}
> +
>  /// \brief Calculate what to divide by to scale weights.
>  ///
>  /// Given the maximum weight, calculate a divisor that will scale all the
> Index: lib/CodeGen/CodeGenPGO.h
> ===================================================================
> --- lib/CodeGen/CodeGenPGO.h
> +++ lib/CodeGen/CodeGenPGO.h
> @@ -19,6 +19,7 @@
>  #include "CodeGenTypes.h"
>  #include "clang/Frontend/CodeGenOptions.h"
>  #include "llvm/ADT/StringMap.h"
> +#include "llvm/ProfileData/InstrProfRecord.h"
>  #include "llvm/Support/MemoryBuffer.h"
>  #include <memory>
>
> @@ -32,20 +33,22 @@
>    std::string FuncName;
>    llvm::GlobalVariable *FuncNameVar;
>
> +  unsigned NumIndirectCallSites;
>    unsigned NumRegionCounters;
>    uint64_t FunctionHash;
>    std::unique_ptr<llvm::DenseMap<const Stmt *, unsigned>> RegionCounterMap;
>    std::unique_ptr<llvm::DenseMap<const Stmt *, uint64_t>> StmtCountMap;
>    std::vector<uint64_t> RegionCounts;
> +  std::vector<llvm::InstrProfValueRecord> IndirectTargetResults;
>    uint64_t CurrentRegionCount;
>    /// \brief A flag that is set to true when this function doesn't need
>    /// to have coverage mapping data.
>    bool SkipCoverageMapping;
>
>  public:
>    CodeGenPGO(CodeGenModule &CGM)
> -      : CGM(CGM), NumRegionCounters(0), FunctionHash(0), CurrentRegionCount(0),
> -        SkipCoverageMapping(false) {}
> +      : CGM(CGM), NumIndirectCallSites(0), NumRegionCounters(0),
> +        FunctionHash(0), CurrentRegionCount(0), SkipCoverageMapping(false) {}
>
>    /// Whether or not we have PGO region data for the current function. This is
>    /// false both when we have no data at all and when our data has been
> @@ -89,6 +92,9 @@
>    /// for an unused declaration.
>    void emitEmptyCounterMapping(const Decl *D, StringRef FuncName,
>                                 llvm::GlobalValue::LinkageTypes Linkage);
> +  // Insert instrumentation or attach profile metadata at indirect call sites
> +  void profileIndirectCallTargets(CGBuilderTy &Builder, llvm::Value *Callee,
> +    llvm::Instruction *CallSite);
>  private:
>    void setFuncName(llvm::Function *Fn);
>    void setFuncName(StringRef Name, llvm::GlobalValue::LinkageTypes Linkage);
> @@ -99,12 +105,19 @@
>                                 llvm::Function *Fn);
>    void loadRegionCounts(llvm::IndexedInstrProfReader *PGOReader,
>                          bool IsInMainFile);
> +  void loadValueProfilingData(llvm::IndexedInstrProfReader *PGOReader,
> +                              bool IsInMainFile);
>    void emitCounterVariables();
>    void emitCounterRegionMapping(const Decl *D);
>
>  public:
>    void emitCounterIncrement(CGBuilderTy &Builder, const Stmt *S);
>
> +  /// Get the indirect target data for the CounterIndex sorted by number of
> +  /// the target addresses called from a particular indirect call site.
> +  void getIndirectTargetData(unsigned CounterIndex, uint64_t &Sum,
> +                             std::multimap<uint64_t, std::string> &OutputData);
> +
>    /// Return the region count for the counter at the given index.
>    uint64_t getRegionCount(const Stmt *S) {
>      if (!RegionCounterMap)
> Index: test/Profile/c-indirect-call.c
> ===================================================================
> --- /dev/null
> +++ test/Profile/c-indirect-call.c
> @@ -0,0 +1,15 @@
> +// Check the data structures emitted by instrumentation.
> +// RUN: %clang_cc1 -triple x86_64-apple-macosx10.9 -main-file-name c-linkage.c %s -o - -emit-llvm -fprofile-instr-generate | FileCheck %s
> +
> +void (*foo)(void);
> +
> +int main(void) {
> +// CHECK:  [[REG1:%[0-9]+]] = load void ()*, void ()** @foo, align 8
> +// CHECK-NEXT:  call void [[REG1]]()
> +// CHECK-NEXT:  [[REG2:%[0-9]+]] = ptrtoint void ()* [[REG1]] to i64
> +// CHECK-NEXT:  call void @__llvm_profile_instrument_target(i32 0, i64 [[REG2]], i8* bitcast ({ i32, i32, i64, i8*, i64*, i8*, i32, i8* }* @__llvm_profile_data_main to i8*), i32 0)
> +  foo();
> +  return 0;
> +}
> +
> +// CHECK: declare void @__llvm_profile_instrument_target(i32, i64, i8*, i32)
> Index: test/Profile/c-linkage-available_externally.c
> ===================================================================
> --- test/Profile/c-linkage-available_externally.c
> +++ test/Profile/c-linkage-available_externally.c
> @@ -3,9 +3,8 @@
>  // RUN: %clang_cc1 -O2 -triple x86_64-apple-macosx10.9 -main-file-name c-linkage-available_externally.c %s -o - -emit-llvm -fprofile-instr-generate | FileCheck %s
>
>  // CHECK: @__llvm_profile_name_foo = linkonce_odr hidden constant [3 x i8] c"foo", section "__DATA,__llvm_prf_names", align 1
> -
>  // CHECK: @__llvm_profile_counters_foo = linkonce_odr hidden global [1 x i64] zeroinitializer, section "__DATA,__llvm_prf_cnts", align 8
> -// CHECK: @__llvm_profile_data_foo = linkonce_odr hidden constant { i32, i32, i64, i8*, i64* } { i32 3, i32 1, i64 {{[0-9]+}}, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__llvm_profile_name_foo, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__llvm_profile_counters_foo, i32 0, i32 0) }, section "__DATA,__llvm_prf_data", align 8
> +// CHECK: @__llvm_profile_data_foo = linkonce_odr hidden global { i32, i32, i64, i8*, i64*, i8*, i32, i8* } { i32 3, i32 1, i64 0, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__llvm_profile_name_foo, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__llvm_profile_counters_foo, i32 0, i32 0), i8* bitcast (i32 ()* @foo to i8*), i32 0, i8* null }, section "__DATA,__llvm_prf_data", align 8
>  inline int foo(void) { return 1; }
>
>  int main(void) {



More information about the cfe-commits mailing list