[PATCH] Clang changes for indirect call target profiling
Justin Bogner
mail at justinbogner.com
Thu May 21 16:10:15 PDT 2015
Betul Buyukkurt <betulb at codeaurora.org> writes:
> Updated CL to reflect recent IRBuilder related changes.
>
>
> http://reviews.llvm.org/D8940
>
> Files:
> lib/CodeGen/CGCall.cpp
> lib/CodeGen/CGClass.cpp
> lib/CodeGen/CodeGenPGO.cpp
> lib/CodeGen/CodeGenPGO.h
> test/Profile/c-indirect-call.c
> test/Profile/c-linkage-available_externally.c
>
> EMAIL PREFERENCES
> http://reviews.llvm.org/settings/panel/emailpreferences/
>
> Index: lib/CodeGen/CGCall.cpp
> ===================================================================
> --- lib/CodeGen/CGCall.cpp
> +++ lib/CodeGen/CGCall.cpp
> @@ -3403,6 +3403,9 @@
> CS.setAttributes(Attrs);
> CS.setCallingConv(static_cast<llvm::CallingConv::ID>(CallingConv));
>
> + // Insert instrumentation or attach profile metadata at indirect call sites
> + PGO.profileIndirectCallTargets(Builder, Callee, CS.getInstruction());
> +
> // In ObjC ARC mode with no ObjC ARC exception safety, tell the ARC
> // optimizer it can aggressively ignore unwind edges.
> if (CGM.getLangOpts().ObjCAutoRefCount)
> Index: lib/CodeGen/CodeGenPGO.cpp
> ===================================================================
> --- lib/CodeGen/CodeGenPGO.cpp
> +++ lib/CodeGen/CodeGenPGO.cpp
> @@ -23,6 +23,11 @@
> #include "llvm/Support/FileSystem.h"
> #include "llvm/Support/MD5.h"
>
> +static llvm::cl::opt<bool> DisableIndirectCallTargetsProfiling(
> + "disable-ic-targets-profiling", llvm::cl::ZeroOrMore,
> + llvm::cl::desc("Disable profiling of indirect calls"),
> + llvm::cl::init(false));
> +
Don't use cl::opt in clang. This should be a clang driver and/or -cc1
option. I also think it should be opt-in for now, rather than opt-out.
> using namespace clang;
> using namespace CodeGen;
>
> @@ -674,6 +679,7 @@
> if (PGOReader) {
> SourceManager &SM = CGM.getContext().getSourceManager();
> loadRegionCounts(PGOReader, SM.isInMainFile(D->getLocation()));
> + loadValueProfilingData(PGOReader, SM.isInMainFile(D->getLocation()));
> computeRegionCounts(D);
> applyFunctionAttributes(PGOReader, Fn);
> }
> @@ -790,12 +796,98 @@
> Builder.getInt32(Counter)});
> }
>
> +static inline std::string getRawFuncName(std::string PrefixedFunctionName) {
> + // For local symbols, the main file name is prepended to the function name
> + // to distinguish them.
> + std::size_t SeparatorPosition = PrefixedFunctionName.find(":");
> + if (SeparatorPosition == std::string::npos)
> + return PrefixedFunctionName;
> + return PrefixedFunctionName.substr(++SeparatorPosition);
> +}
You can't do this. It won't work. The ':' character can appear in names
that aren't prefixed as well (like in objective C).
> +
> +void CodeGenPGO::getIndirectTargetData(unsigned CounterIndex,
> + uint64_t &Sum, std::multimap<uint64_t, std::string> &OutputData) {
Why a multimap? It seems like what callers of this really want is "The
top <N> targets", right? Maybe this should collect the data, sort it by
most called and return the top <N> given as a parameter?
> + Sum = 0;
> + OutputData.clear();
> + for (unsigned I = 0; I < IndirectTargetResults.size(); ++I) {
> + llvm::InstrProfValueRecord *Current = &IndirectTargetResults[I];
> + if (CounterIndex == Current->CounterIndex) {
> + OutputData.insert(std::pair<uint64_t, std::string>(
> + Current->NumTaken, getRawFuncName(Current->Name.str())));
> + Sum += Current->NumTaken;
> + }
> + }
> +}
> +
> +// This method either inserts a call to the profile run-time during
> +// instrumentation or puts profile data into metadata for PGO use.
> +void CodeGenPGO::profileIndirectCallTargets(CGBuilderTy &Builder,
I'd probably split the intrinsic generation and profile reading parts of
this into two helper functions - it'll make it a little easier to read.
> + llvm::Value* Callee, llvm::Instruction *CallSite) {
> +
> + if (DisableIndirectCallTargetsProfiling)
> + return;
> +
> + if (!Callee || !CallSite || !Builder.GetInsertPoint())
> + return;
> +
> + bool InstrumentCallSites = CGM.getCodeGenOpts().ProfileInstrGenerate;
> + llvm::IndexedInstrProfReader *PGOReader = CGM.getPGOReader();
> + if (!InstrumentCallSites && !PGOReader)
> + return;
> +
> + llvm::Function *DirectCallee = nullptr;
> + if (const llvm::ConstantExpr *CE = dyn_cast<llvm::ConstantExpr>(Callee))
> + DirectCallee = dyn_cast<llvm::Function> (CE->getOperand(0));
> + else DirectCallee = dyn_cast<llvm::Function> (Callee);
> + if (DirectCallee) return;
> +
> + llvm::LLVMContext &Ctx = CGM.getLLVMContext();
> + if (InstrumentCallSites && RegionCounterMap) {
> + auto *I8PtrTy = llvm::Type::getInt8PtrTy(Ctx);
> + llvm::Value *Args[5] = {
> + llvm::ConstantExpr::getBitCast(FuncNameVar, I8PtrTy),
> + Builder.getInt64(FunctionHash),
> + Builder.getInt32(llvm::instr_value_prof_kind::indirect_call_target),
> + Builder.CreatePtrToInt(Callee, Builder.getInt64Ty()),
> + Builder.getInt32(NumIndirectCallSites++)
> + };
> + Builder.CreateCall(
> + CGM.getIntrinsic(llvm::Intrinsic::instrprof_instrument_target), Args);
> + }
> + if (PGOReader && haveRegionCounts()) {
> + // We record the top most called five function at each call site.
> + // Profile metadata contains "indirect_call_targets" string identifying
> + // this metadata, uint64_t value for the total number of times the call
> + // is executed, followed by the function names and execution counts
> + // (uint64_t) of each function.
> + uint64_t Sum;
> + std::multimap<uint64_t, std::string> IndirectCallSiteData;
> + getIndirectTargetData(NumIndirectCallSites, Sum, IndirectCallSiteData);
> + std::multimap<uint64_t, std::string>::reverse_iterator I =
> + IndirectCallSiteData.rbegin(), E = IndirectCallSiteData.rend();
> +
> + llvm::MDBuilder MDHelper(Ctx);
> + SmallVector<llvm::Metadata*, 2> Vals;
> + Vals.push_back(MDHelper.createString("indirect_call_targets"));
> + Vals.push_back(MDHelper.createConstant(
> + llvm::ConstantInt::get(llvm::Type::getInt64Ty(Ctx), Sum)));
> + for (unsigned Count = 0; (I != E) && (Count < 5); ++I, ++Count) {
> + Vals.push_back(MDHelper.createString(I->second));
> + Vals.push_back(MDHelper.createConstant(
> + llvm::ConstantInt::get(llvm::Type::getInt64Ty(Ctx), I->first)));
> + }
This iteration will be much simpler with the change to
getIndirectTargetData above.
> + CallSite->setMetadata(
> + llvm::LLVMContext::MD_prof, llvm::MDNode::get(Ctx, Vals));
> + NumIndirectCallSites++;
> + }
> +}
> +
> void CodeGenPGO::loadRegionCounts(llvm::IndexedInstrProfReader *PGOReader,
> bool IsInMainFile) {
> CGM.getPGOStats().addVisited(IsInMainFile);
> RegionCounts.clear();
> if (std::error_code EC =
> - PGOReader->getFunctionCounts(FuncName, FunctionHash, RegionCounts)) {
> + PGOReader->getFunctionCounts(FuncName, FunctionHash, RegionCounts)) {
Arbitrary whitespace change? Please remove this.
> if (EC == llvm::instrprof_error::unknown_function)
> CGM.getPGOStats().addMissing(IsInMainFile);
> else if (EC == llvm::instrprof_error::hash_mismatch)
> @@ -807,6 +899,23 @@
> }
> }
>
> +void CodeGenPGO::loadValueProfilingData(
> + llvm::IndexedInstrProfReader *PGOReader, bool IsInMainFile) {
> + IndirectTargetResults.clear();
> + if (std::error_code EC = PGOReader->getFunctionValuesForKind(FuncName,
> + FunctionHash, llvm::instr_value_prof_kind::indirect_call_target,
> + IndirectTargetResults)) {
> + if (EC == llvm::instrprof_error::unknown_function)
> + CGM.getPGOStats().addMissing(IsInMainFile);
> + else if (EC == llvm::instrprof_error::hash_mismatch)
> + CGM.getPGOStats().addMismatched(IsInMainFile);
> + else if (EC == llvm::instrprof_error::malformed)
> + // TODO: Consider a more specific warning for this case.
> + CGM.getPGOStats().addMismatched(IsInMainFile);
> + IndirectTargetResults.clear();
> + }
> +}
> +
> /// \brief Calculate what to divide by to scale weights.
> ///
> /// Given the maximum weight, calculate a divisor that will scale all the
> Index: lib/CodeGen/CodeGenPGO.h
> ===================================================================
> --- lib/CodeGen/CodeGenPGO.h
> +++ lib/CodeGen/CodeGenPGO.h
> @@ -19,6 +19,7 @@
> #include "CodeGenTypes.h"
> #include "clang/Frontend/CodeGenOptions.h"
> #include "llvm/ADT/StringMap.h"
> +#include "llvm/ProfileData/InstrProfRecord.h"
> #include "llvm/Support/MemoryBuffer.h"
> #include <memory>
>
> @@ -32,20 +33,22 @@
> std::string FuncName;
> llvm::GlobalVariable *FuncNameVar;
>
> + unsigned NumIndirectCallSites;
> unsigned NumRegionCounters;
> uint64_t FunctionHash;
> std::unique_ptr<llvm::DenseMap<const Stmt *, unsigned>> RegionCounterMap;
> std::unique_ptr<llvm::DenseMap<const Stmt *, uint64_t>> StmtCountMap;
> std::vector<uint64_t> RegionCounts;
> + std::vector<llvm::InstrProfValueRecord> IndirectTargetResults;
> uint64_t CurrentRegionCount;
> /// \brief A flag that is set to true when this function doesn't need
> /// to have coverage mapping data.
> bool SkipCoverageMapping;
>
> public:
> CodeGenPGO(CodeGenModule &CGM)
> - : CGM(CGM), NumRegionCounters(0), FunctionHash(0), CurrentRegionCount(0),
> - SkipCoverageMapping(false) {}
> + : CGM(CGM), NumIndirectCallSites(0), NumRegionCounters(0),
> + FunctionHash(0), CurrentRegionCount(0), SkipCoverageMapping(false) {}
>
> /// Whether or not we have PGO region data for the current function. This is
> /// false both when we have no data at all and when our data has been
> @@ -89,6 +92,9 @@
> /// for an unused declaration.
> void emitEmptyCounterMapping(const Decl *D, StringRef FuncName,
> llvm::GlobalValue::LinkageTypes Linkage);
> + // Insert instrumentation or attach profile metadata at indirect call sites
> + void profileIndirectCallTargets(CGBuilderTy &Builder, llvm::Value *Callee,
> + llvm::Instruction *CallSite);
> private:
> void setFuncName(llvm::Function *Fn);
> void setFuncName(StringRef Name, llvm::GlobalValue::LinkageTypes Linkage);
> @@ -99,12 +105,19 @@
> llvm::Function *Fn);
> void loadRegionCounts(llvm::IndexedInstrProfReader *PGOReader,
> bool IsInMainFile);
> + void loadValueProfilingData(llvm::IndexedInstrProfReader *PGOReader,
> + bool IsInMainFile);
> void emitCounterVariables();
> void emitCounterRegionMapping(const Decl *D);
>
> public:
> void emitCounterIncrement(CGBuilderTy &Builder, const Stmt *S);
>
> + /// Get the indirect target data for the CounterIndex sorted by number of
> + /// the target addresses called from a particular indirect call site.
> + void getIndirectTargetData(unsigned CounterIndex, uint64_t &Sum,
> + std::multimap<uint64_t, std::string> &OutputData);
> +
> /// Return the region count for the counter at the given index.
> uint64_t getRegionCount(const Stmt *S) {
> if (!RegionCounterMap)
> Index: test/Profile/c-indirect-call.c
> ===================================================================
> --- /dev/null
> +++ test/Profile/c-indirect-call.c
> @@ -0,0 +1,15 @@
> +// Check the data structures emitted by instrumentation.
> +// RUN: %clang_cc1 -triple x86_64-apple-macosx10.9 -main-file-name c-linkage.c %s -o - -emit-llvm -fprofile-instr-generate | FileCheck %s
> +
> +void (*foo)(void);
> +
> +int main(void) {
> +// CHECK: [[REG1:%[0-9]+]] = load void ()*, void ()** @foo, align 8
> +// CHECK-NEXT: call void [[REG1]]()
> +// CHECK-NEXT: [[REG2:%[0-9]+]] = ptrtoint void ()* [[REG1]] to i64
> +// CHECK-NEXT: call void @__llvm_profile_instrument_target(i32 0, i64 [[REG2]], i8* bitcast ({ i32, i32, i64, i8*, i64*, i8*, i32, i8* }* @__llvm_profile_data_main to i8*), i32 0)
> + foo();
> + return 0;
> +}
> +
> +// CHECK: declare void @__llvm_profile_instrument_target(i32, i64, i8*, i32)
> Index: test/Profile/c-linkage-available_externally.c
> ===================================================================
> --- test/Profile/c-linkage-available_externally.c
> +++ test/Profile/c-linkage-available_externally.c
> @@ -3,9 +3,8 @@
> // RUN: %clang_cc1 -O2 -triple x86_64-apple-macosx10.9 -main-file-name c-linkage-available_externally.c %s -o - -emit-llvm -fprofile-instr-generate | FileCheck %s
>
> // CHECK: @__llvm_profile_name_foo = linkonce_odr hidden constant [3 x i8] c"foo", section "__DATA,__llvm_prf_names", align 1
> -
> // CHECK: @__llvm_profile_counters_foo = linkonce_odr hidden global [1 x i64] zeroinitializer, section "__DATA,__llvm_prf_cnts", align 8
> -// CHECK: @__llvm_profile_data_foo = linkonce_odr hidden constant { i32, i32, i64, i8*, i64* } { i32 3, i32 1, i64 {{[0-9]+}}, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__llvm_profile_name_foo, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__llvm_profile_counters_foo, i32 0, i32 0) }, section "__DATA,__llvm_prf_data", align 8
> +// CHECK: @__llvm_profile_data_foo = linkonce_odr hidden global { i32, i32, i64, i8*, i64*, i8*, i32, i8* } { i32 3, i32 1, i64 0, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__llvm_profile_name_foo, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__llvm_profile_counters_foo, i32 0, i32 0), i8* bitcast (i32 ()* @foo to i8*), i32 0, i8* null }, section "__DATA,__llvm_prf_data", align 8
> inline int foo(void) { return 1; }
>
> int main(void) {
More information about the cfe-commits
mailing list