[PATCH] D13308: Value profiling - remaining LLVM changes

Justin Bogner via llvm-commits llvm-commits at lists.llvm.org
Thu Nov 5 17:56:13 PST 2015


Betul Buyukkurt <betulb at codeaurora.org> writes:
> betulb updated this revision to Diff 37420.
> betulb added a comment.
>
> In this revision:
>
> - 'target' word is removed from the value profiling intrinsic name,
> documentation and from intrinsic lowering code.
> - Raw profile header struct is extended to record the last value of
> the enum for value kinds.

Sorry I took so long to get to this. It looks like it's pretty close.

As usual, please clang-format your changes here.

>
> http://reviews.llvm.org/D13308
>
> Files:
>   docs/LangRef.rst
>   include/llvm/IR/IntrinsicInst.h
>   include/llvm/IR/Intrinsics.td
>   include/llvm/ProfileData/InstrProf.h
>   include/llvm/ProfileData/InstrProfReader.h
>   lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
>   lib/ProfileData/InstrProfReader.cpp
>   lib/Transforms/Instrumentation/InstrProfiling.cpp
>   test/Instrumentation/InstrProfiling/PR23499.ll
>   test/Instrumentation/InstrProfiling/linkage.ll
>   test/Instrumentation/InstrProfiling/platform.ll
>   test/Instrumentation/InstrProfiling/profiling.ll
>
> Index: test/Instrumentation/InstrProfiling/profiling.ll
> ===================================================================
> --- test/Instrumentation/InstrProfiling/profiling.ll
> +++ test/Instrumentation/InstrProfiling/profiling.ll
> @@ -10,21 +10,21 @@
>  ; CHECK: @baz_prof_name = hidden constant [3 x i8] c"baz", section "__DATA,__llvm_prf_names", align 1
>
>  ; CHECK: @__llvm_profile_counters_foo = hidden global [1 x i64] zeroinitializer, section "__DATA,__llvm_prf_cnts", align 8
> -; CHECK: @__llvm_profile_data_foo = hidden constant {{.*}}, section "__DATA,__llvm_prf_data", align 8
> +; CHECK: @__llvm_profile_data_foo = hidden {{.*}}, section "__DATA,__llvm_prf_data", align 8
>  define void @foo() {
>    call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__llvm_profile_name_foo, i32 0, i32 0), i64 0, i32 1, i32 0)
>    ret void
>  }
>
>  ; CHECK: @__llvm_profile_counters_bar = hidden global [1 x i64] zeroinitializer, section "__DATA,__llvm_prf_cnts", align 8
> -; CHECK: @__llvm_profile_data_bar = hidden constant {{.*}}, section "__DATA,__llvm_prf_data", align 8
> +; CHECK: @__llvm_profile_data_bar = hidden {{.*}}, section "__DATA,__llvm_prf_data", align 8
>  define void @bar() {
>    call void @llvm.instrprof.increment(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @__llvm_profile_name_bar, i32 0, i32 0), i64 0, i32 1, i32 0)
>    ret void
>  }
>
>  ; CHECK: @__llvm_profile_counters_baz = hidden global [3 x i64] zeroinitializer, section "__DATA,__llvm_prf_cnts", align 8
> -; CHECK: @__llvm_profile_data_baz = hidden constant {{.*}}, section "__DATA,__llvm_prf_data", align 8
> +; CHECK: @__llvm_profile_data_baz = hidden {{.*}}, section "__DATA,__llvm_prf_data", align 8
>  define void @baz() {
>    call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @baz_prof_name, i32 0, i32 0), i64 0, i32 3, i32 0)
>    call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @baz_prof_name, i32 0, i32 0), i64 0, i32 3, i32 1)
> Index: test/Instrumentation/InstrProfiling/platform.ll
> ===================================================================
> --- test/Instrumentation/InstrProfiling/platform.ll
> +++ test/Instrumentation/InstrProfiling/platform.ll
> @@ -11,9 +11,10 @@
>  ; MACHO: @__llvm_profile_counters_foo = hidden global [1 x i64] zeroinitializer, section "__DATA,__llvm_prf_cnts", align 8
>  ; ELF: @__llvm_profile_counters_foo = hidden global [1 x i64] zeroinitializer, section "__llvm_prf_cnts", align 8
>
> -; MACHO: @__llvm_profile_data_foo = hidden constant {{.*}}, section "__DATA,__llvm_prf_data", align 8
> -; LINUX: @__llvm_profile_data_foo = hidden constant {{.*}}, section "__llvm_prf_data", align 8
> -; BSD: @__llvm_profile_data_foo = hidden constant {{.*}}, section "__llvm_prf_data", align 8
> +; MACHO: @__llvm_profile_data_foo = hidden {{.*}}, section "__DATA,__llvm_prf_data", align 8
> +; LINUX: @__llvm_profile_data_foo = hidden {{.*}}, section "__llvm_prf_data", align 8
> +; BSD: @__llvm_profile_data_foo = hidden {{.*}}, section "__llvm_prf_data", align 8
> +
>  define void @foo() {
>    call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__llvm_profile_name_foo, i32 0, i32 0), i64 0, i32 1, i32 0)
>    ret void
> Index: test/Instrumentation/InstrProfiling/linkage.ll
> ===================================================================
> --- test/Instrumentation/InstrProfiling/linkage.ll
> +++ test/Instrumentation/InstrProfiling/linkage.ll
> @@ -9,28 +9,28 @@
>  @__llvm_profile_name_foo_inline = linkonce_odr hidden constant [10 x i8] c"foo_inline"
>
>  ; CHECK: @__llvm_profile_counters_foo = hidden global
> -; CHECK: @__llvm_profile_data_foo = hidden constant
> +; CHECK: @__llvm_profile_data_foo = hidden
>  define void @foo() {
>    call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__llvm_profile_name_foo, i32 0, i32 0), i64 0, i32 1, i32 0)
>    ret void
>  }
>
>  ; CHECK: @__llvm_profile_counters_foo_weak = weak hidden global
> -; CHECK: @__llvm_profile_data_foo_weak = weak hidden constant
> +; CHECK: @__llvm_profile_data_foo_weak = weak hidden
>  define weak void @foo_weak() {
>    call void @llvm.instrprof.increment(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @__llvm_profile_name_foo_weak, i32 0, i32 0), i64 0, i32 1, i32 0)
>    ret void
>  }
>
>  ; CHECK: @"__llvm_profile_counters_linkage.ll:foo_internal" = internal global
> -; CHECK: @"__llvm_profile_data_linkage.ll:foo_internal" = internal constant
> +; CHECK: @"__llvm_profile_data_linkage.ll:foo_internal" = internal
>  define internal void @foo_internal() {
>    call void @llvm.instrprof.increment(i8* getelementptr inbounds ([23 x i8], [23 x i8]* @"__llvm_profile_name_linkage.ll:foo_internal", i32 0, i32 0), i64 0, i32 1, i32 0)
>    ret void
>  }
>
>  ; CHECK: @__llvm_profile_counters_foo_inline = linkonce_odr hidden global
> -; CHECK: @__llvm_profile_data_foo_inline = linkonce_odr hidden constant
> +; CHECK: @__llvm_profile_data_foo_inline = linkonce_odr hidden
>  define linkonce_odr void @foo_inline() {
>    call void @llvm.instrprof.increment(i8* getelementptr inbounds ([10 x i8], [10 x i8]* @__llvm_profile_name_foo_inline, i32 0, i32 0), i64 0, i32 1, i32 0)
>    ret void
> Index: test/Instrumentation/InstrProfiling/PR23499.ll
> ===================================================================
> --- test/Instrumentation/InstrProfiling/PR23499.ll
> +++ test/Instrumentation/InstrProfiling/PR23499.ll
> @@ -10,7 +10,7 @@
>
>  ; CHECK: @__llvm_profile_name__Z3barIvEvv = linkonce_odr hidden constant [11 x i8] c"_Z3barIvEvv", section "{{.*}}__llvm_prf_names", comdat($__llvm_profile_vars__Z3barIvEvv), align 1
>  ; CHECK: @__llvm_profile_counters__Z3barIvEvv = linkonce_odr hidden global [1 x i64] zeroinitializer, section "{{.*}}__llvm_prf_cnts", comdat($__llvm_profile_vars__Z3barIvEvv), align 8
> -; CHECK: @__llvm_profile_data__Z3barIvEvv = linkonce_odr hidden constant { i32, i32, i64, i8*, i64* } { i32 11, i32 1, i64 0, i8* getelementptr inbounds ([11 x i8], [11 x i8]* @__llvm_profile_name__Z3barIvEvv, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__llvm_profile_counters__Z3barIvEvv, i32 0, i32 0) }, section "{{.*}}__llvm_prf_data", comdat($__llvm_profile_vars__Z3barIvEvv), align 8
> +; CHECK: @__llvm_profile_data__Z3barIvEvv = linkonce_odr hidden global { i32, i32, i64, i8*, i64*, i8*, [1 x i32], i8* } { i32 11, i32 1, i64 0, i8* getelementptr inbounds ([11 x i8], [11 x i8]* @__llvm_profile_name__Z3barIvEvv, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__llvm_profile_counters__Z3barIvEvv, i32 0, i32 0), i8* null, [1 x i32] zeroinitializer, i8* null }, section "{{.*}}__llvm_prf_data", comdat($__llvm_profile_vars__Z3barIvEvv), align 8

Does dropping the constant off of all of these data sections have an
effect on the binaries we output? Ie, does it increase size or number of
relocations or anything like that?

>
>  declare void @llvm.instrprof.increment(i8*, i64, i32, i32) #1
>
> Index: lib/Transforms/Instrumentation/InstrProfiling.cpp
> ===================================================================
> --- lib/Transforms/Instrumentation/InstrProfiling.cpp
> +++ lib/Transforms/Instrumentation/InstrProfiling.cpp
> @@ -7,9 +7,9 @@
>  //
>  //===----------------------------------------------------------------------===//
>  //
> -// This pass lowers instrprof_increment intrinsics emitted by a frontend for
> -// profiling. It also builds the data structures and initialization code needed
> -// for updating execution counts and emitting the profile at runtime.
> +// This pass lowers instrprof_* intrinsics emitted by a frontend for profiling.
> +// It also builds the data structures and initialization code needed for
> +// updating execution counts and emitting the profile at runtime.
>  //
>  //===----------------------------------------------------------------------===//
>
> @@ -19,6 +19,7 @@
>  #include "llvm/IR/IRBuilder.h"
>  #include "llvm/IR/IntrinsicInst.h"
>  #include "llvm/IR/Module.h"
> +#include "llvm/ProfileData/InstrProf.h"
>  #include "llvm/Transforms/Utils/ModuleUtils.h"
>
>  using namespace llvm;
> @@ -49,7 +50,15 @@
>  private:
>    InstrProfOptions Options;
>    Module *M;
> -  DenseMap<GlobalVariable *, GlobalVariable *> RegionCounters;
> +  typedef struct PerFunctionProfileData {
> +    uint32_t NumValueSites[IPVK_Last+1];
> +    GlobalVariable* RegionCounters;
> +    GlobalVariable* DataVar;
> +    PerFunctionProfileData() : RegionCounters(nullptr), DataVar(nullptr) {
> +      memset(NumValueSites, 0, sizeof(uint32_t) * (IPVK_Last+1));
> +    }

I think it'd be simpler to use a std::array for NumValueSites. This at
least lets us call size() in a couple of places instead of having to
write `IPVK_Last + 1` everywhere.

> +  } PerFunctionProfileData;
> +  DenseMap<GlobalVariable *, PerFunctionProfileData> ProfileDataMap;
>    std::vector<Value *> UsedVars;
>
>    bool isMachO() const {
> @@ -76,6 +85,12 @@
>      return isMachO() ? "__DATA,__llvm_covmap" : "__llvm_covmap";
>    }
>
> +  /// Count the number of instrumented value sites for the function.
> +  void computeNumValueSiteCounts(InstrProfValueProfileInst *Ins);
> +
> +  /// Replace instrprof_value_profile with a call to runtime library.
> +  void lowerValueProfileInst(InstrProfValueProfileInst *Ins);
> +
>    /// Replace instrprof_increment with an increment of the appropriate value.
>    void lowerIncrement(InstrProfIncrementInst *Inc);
>
> @@ -117,20 +132,39 @@
>    bool MadeChange = false;
>
>    this->M = &M;
> -  RegionCounters.clear();
> +  ProfileDataMap.clear();
>    UsedVars.clear();
>
> +  // We did not know how many value sites there would be inside
> +  // the instrumented function. This is counting the number of instrumented
> +  // target value sites to enter it as field in the profile data variable.

I'd still be happier if the frontend figured this number out and passed
it in. Can you remind me what isn't practical about that?

> +  for (Function &F : M)
> +    for (BasicBlock &BB : F)
> +      for (auto I = BB.begin(), E = BB.end(); I != E;)
> +        if (auto *Ind = dyn_cast<InstrProfValueProfileInst>(I++))
> +          computeNumValueSiteCounts(Ind);
> +
>    for (Function &F : M)
>      for (BasicBlock &BB : F)
>        for (auto I = BB.begin(), E = BB.end(); I != E;)
>          if (auto *Inc = dyn_cast<InstrProfIncrementInst>(I++)) {
>            lowerIncrement(Inc);
>            MadeChange = true;
>          }
> +
> +  for (Function &F : M)
> +    for (BasicBlock &BB : F)
> +      for (auto I = BB.begin(), E = BB.end(); I != E;)
> +        if (auto *Ind = dyn_cast<InstrProfValueProfileInst>(I++)) {
> +          lowerValueProfileInst(Ind);
> +          MadeChange = true;
> +        }
> +

Do we really need to iterate over the basic blocks *three times*?

>    if (GlobalVariable *Coverage = M.getNamedGlobal("__llvm_coverage_mapping")) {
>      lowerCoverageData(Coverage);
>      MadeChange = true;
>    }
> +
>    if (!MadeChange)
>      return false;
>
> @@ -141,6 +175,55 @@
>    return true;
>  }
>
> +static Constant *getOrInsertValueProfilingCall(Module &M) {
> +  auto *VoidTy = Type::getVoidTy(M.getContext());
> +  auto *VoidPtrTy = Type::getInt8PtrTy(M.getContext());
> +  auto *Int32Ty = Type::getInt32Ty(M.getContext());
> +  auto *Int64Ty = Type::getInt64Ty(M.getContext());
> +  Type *ArgTypes[] = {Int64Ty, VoidPtrTy, Int32Ty};
> +  auto *ValueProfilingCallTy = FunctionType::get(VoidTy,
> +    makeArrayRef(ArgTypes), false);
> +  return M.getOrInsertFunction("__llvm_profile_instrument_target",
> +    ValueProfilingCallTy);
> +}
> +
> +void InstrProfiling::computeNumValueSiteCounts(InstrProfValueProfileInst *Ind) {
> +
> +  GlobalVariable *Name = Ind->getName();
> +  uint64_t ValueKind = Ind->getValueKind()->getZExtValue();
> +  uint64_t Index = Ind->getIndex()->getZExtValue();
> +  auto It = ProfileDataMap.find(Name);
> +  if (It == ProfileDataMap.end()) {
> +    PerFunctionProfileData PD;
> +    PD.NumValueSites[ValueKind] = Index + 1;
> +    ProfileDataMap[Name] = PD;
> +  }
> +  else if (It->second.NumValueSites[ValueKind] <= Index)
> +    It->second.NumValueSites[ValueKind] = Index + 1;
> +}
> +
> +void InstrProfiling::lowerValueProfileInst(InstrProfValueProfileInst *Ind) {
> +
> +  GlobalVariable *Name = Ind->getName();
> +  auto It = ProfileDataMap.find(Name);
> +  assert(It != ProfileDataMap.end() && It->second.DataVar &&
> +    "value profiling detected in function with no counter incerement");
> +
> +  GlobalVariable *DataVar = It->second.DataVar;
> +  uint64_t ValueKind = Ind->getValueKind()->getZExtValue();
> +  uint64_t Index = Ind->getIndex()->getZExtValue();
> +  for (uint32_t Kind = IPVK_First; Kind < ValueKind; ++Kind)
> +    Index += It->second.NumValueSites[Kind];
> +
> +  IRBuilder<> Builder(Ind->getParent(), *Ind);

As we discussed in person, we really shouldn't use getParent() here in
the long run. It'll cause issues when we start doing more optimizations
involving the intrinsics. It's fine for now, since we're also doing this
hack for the COMDATs.

Maybe mark it with a TODO?

> +  Value* Args[3] = {Ind->getTargetValue(),
> +    Builder.CreateBitCast(DataVar, Builder.getInt8PtrTy()),
> +    Builder.getInt32(Index)};
> +  Ind->replaceAllUsesWith(
> +    Builder.CreateCall(getOrInsertValueProfilingCall(*M), Args));
> +  Ind->eraseFromParent();
> +}
> +
>  void InstrProfiling::lowerIncrement(InstrProfIncrementInst *Inc) {
>    GlobalVariable *Counters = getOrCreateRegionCounters(Inc);
>
> @@ -172,9 +255,10 @@
>      GlobalVariable *Name = cast<GlobalVariable>(V);
>
>      // If we have region counters for this name, we've already handled it.
> -    auto It = RegionCounters.find(Name);
> -    if (It != RegionCounters.end())
> -      continue;
> +    auto It = ProfileDataMap.find(Name);
> +    if (It != ProfileDataMap.end())
> +      if (It->second.RegionCounters)
> +        continue;
>
>      // Move the name variable to the right section.
>      Name->setSection(getNameSection());
> @@ -189,12 +273,25 @@
>    return ("__llvm_profile_" + VarName + "_" + Name).str();
>  }
>
> +static inline bool shouldRecordFunctionAddr(Function *F) {
> +  // Check the linkage
> +  if (!F->hasLinkOnceLinkage() && !F->hasLocalLinkage() &&
> +      !F->hasAvailableExternallyLinkage())
> +    return true;
> +  // Check uses of this function for other than direct calls or invokes to it.
> +  return F->hasAddressTaken();
> +}
> +
>  GlobalVariable *
>  InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) {
>    GlobalVariable *Name = Inc->getName();
> -  auto It = RegionCounters.find(Name);
> -  if (It != RegionCounters.end())
> -    return It->second;
> +  auto It = ProfileDataMap.find(Name);
> +  PerFunctionProfileData PD;
> +  if (It != ProfileDataMap.end()) {
> +    if (It->second.RegionCounters)
> +      return It->second.RegionCounters;
> +    PD = It->second;
> +  }
>
>    // Move the name variable to the right section. Place them in a COMDAT group
>    // if the associated function is a COMDAT. This will make sure that
> @@ -221,31 +318,46 @@
>    Counters->setAlignment(8);
>    Counters->setComdat(ProfileVarsComdat);
>
> -  RegionCounters[Inc->getName()] = Counters;
> -
>    // Create data variable.
>    auto *NameArrayTy = Name->getType()->getPointerElementType();
>    auto *Int32Ty = Type::getInt32Ty(Ctx);
>    auto *Int64Ty = Type::getInt64Ty(Ctx);
>    auto *Int8PtrTy = Type::getInt8PtrTy(Ctx);
>    auto *Int64PtrTy = Type::getInt64PtrTy(Ctx);
> +  auto *Int32ArrayTy = ArrayType::get(Int32Ty, IPVK_Last+1);
> +
> +  Constant *FunctionAddr = shouldRecordFunctionAddr(Fn) ?
> +                           ConstantExpr::getBitCast(Fn, Int8PtrTy) :
> +                           ConstantPointerNull::get(Int8PtrTy);
>
> -  Type *DataTypes[] = {Int32Ty, Int32Ty, Int64Ty, Int8PtrTy, Int64PtrTy};
> +  Constant *Int32ArrayVals[IPVK_Last+1];
> +  for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
> +    Int32ArrayVals[Kind] = ConstantInt::get(Int32Ty, PD.NumValueSites[Kind]);
> +
> +  Type *DataTypes[] = {Int32Ty, Int32Ty, Int64Ty, Int8PtrTy, Int64PtrTy,
> +    Int8PtrTy, Int32ArrayTy, Int8PtrTy};
>    auto *DataTy = StructType::get(Ctx, makeArrayRef(DataTypes));
>    Constant *DataVals[] = {
>        ConstantInt::get(Int32Ty, NameArrayTy->getArrayNumElements()),
>        ConstantInt::get(Int32Ty, NumCounters),
>        ConstantInt::get(Int64Ty, Inc->getHash()->getZExtValue()),
>        ConstantExpr::getBitCast(Name, Int8PtrTy),
> -      ConstantExpr::getBitCast(Counters, Int64PtrTy)};
> -  auto *Data = new GlobalVariable(*M, DataTy, true, Name->getLinkage(),
> +      ConstantExpr::getBitCast(Counters, Int64PtrTy),
> +      FunctionAddr,
> +      ConstantArray::get(Int32ArrayTy, Int32ArrayVals),
> +      ConstantPointerNull::get(Int8PtrTy)};
> +  auto *Data = new GlobalVariable(*M, DataTy, false, Name->getLinkage(),
>                                    ConstantStruct::get(DataTy, DataVals),
>                                    getVarName(Inc, "data"));
>    Data->setVisibility(Name->getVisibility());
>    Data->setSection(getDataSection());
>    Data->setAlignment(8);
>    Data->setComdat(ProfileVarsComdat);
>
> +  PD.RegionCounters = Counters;
> +  PD.DataVar = Data;
> +  ProfileDataMap[Name] = PD;
> +
>    // Mark the data variable as used so that it isn't stripped out.
>    UsedVars.push_back(Data);
>
> @@ -337,7 +449,6 @@
>    LLVMUsed =
>        new GlobalVariable(*M, ATy, false, GlobalValue::AppendingLinkage,
>                           ConstantArray::get(ATy, MergedVars), "llvm.used");
> -
>    LLVMUsed->setSection("llvm.metadata");
>  }
>
> Index: lib/ProfileData/InstrProfReader.cpp
> ===================================================================
> --- lib/ProfileData/InstrProfReader.cpp
> +++ lib/ProfileData/InstrProfReader.cpp
> @@ -218,7 +218,7 @@
>  }
>
>  static uint64_t getRawVersion() {
> -  return 1;
> +  return 2;
>  }
>
>  template <class IntPtrT>
> @@ -232,11 +232,21 @@
>    auto DataSize = swap(Header.DataSize);
>    auto CountersSize = swap(Header.CountersSize);
>    auto NamesSize = swap(Header.NamesSize);
> +  auto PaddingSize = sizeof(uint64_t) - NamesSize % sizeof(uint64_t);
> +  ValueKindLast = swap(Header.ValueKindLast);
> +  auto ValueDataCountsSize = swap(Header.ValueDataCountsSize);
> +  const uint64_t Padding2Size = sizeof(uint64_t) -
> +    (ValueDataCountsSize * sizeof(uint32_t)) % sizeof(uint64_t);
> +  auto ValueDataSize = swap(Header.ValueDataSize);
>
>    ptrdiff_t DataOffset = sizeof(RawHeader);
>    ptrdiff_t CountersOffset = DataOffset + sizeof(ProfileData) * DataSize;
>    ptrdiff_t NamesOffset = CountersOffset + sizeof(uint64_t) * CountersSize;
> -  size_t ProfileSize = NamesOffset + sizeof(char) * NamesSize;
> +  ptrdiff_t ValueDataCountsOffset = NamesOffset +
> +    sizeof(char) * (NamesSize + PaddingSize);
> +  ptrdiff_t ValueDataOffset = ValueDataCountsOffset +
> +    sizeof(uint32_t) * ValueDataCountsSize + sizeof(char) * Padding2Size;
> +  size_t ProfileSize = ValueDataOffset + sizeof(ValueData) * ValueDataSize;
>
>    auto *Start = reinterpret_cast<const char *>(&Header);
>    if (Start + ProfileSize > DataBuffer->getBufferEnd())
> @@ -246,14 +256,27 @@
>    DataEnd = Data + DataSize;
>    CountersStart = reinterpret_cast<const uint64_t *>(Start + CountersOffset);
>    NamesStart = Start + NamesOffset;
> +  CurrentVDataCounts =
> +    reinterpret_cast<const uint32_t *>(Start + ValueDataCountsOffset);
> +  ValueDataStart = reinterpret_cast<const ValueData *>(Start + ValueDataOffset);
> +  CurrentVData = ValueDataStart;
>    ProfileEnd = Start + ProfileSize;
>
> +  FunctionPtrToNameMap.clear();
> +  for (const ProfileData *I = Data; I != DataEnd; ++I) {
> +    StringRef FunctionName(getName(I->NamePtr), swap(I->NameSize));
> +    const char* NameEntryPtr = StringTable.insertString(FunctionName);
> +    FunctionPtrToNameMap.insert(std::pair<const IntPtrT, const char*>
> +      (swap(I->FunctionPointer), NameEntryPtr));
> +  }
> +
>    return success();
>  }
>
>  template <class IntPtrT>
>  std::error_code
>  RawInstrProfReader<IntPtrT>::readNextRecord(InstrProfRecord &Record) {
> +
>    if (Data == DataEnd)
>      if (std::error_code EC = readNextHeader(ProfileEnd))
>        return EC;
> @@ -284,6 +307,48 @@
>    } else
>      Record.Counts = RawCounts;
>
> +  Record.clearValueData();
> +  if (!Data->Values) {
> +    ++Data;
> +    return success();
> +  }
> +
> +  // Read value data.
> +  for (uint32_t Kind = IPVK_First; Kind <= ValueKindLast; ++Kind) {
> +
> +    uint32_t NumVSites = swap(Data->NumValueSites[Kind]);
> +    if ((CurrentVDataCounts + NumVSites) > (uint32_t *)ValueDataStart)
> +      return error(instrprof_error::malformed);
> +
> +    std::vector<InstrProfValueSiteRecord> &ValueSites =
> +        Record.getValueSitesForKind(Kind);
> +    ValueSites.reserve(NumVSites);
> +    for (uint32_t VSite = 0; VSite < NumVSites; ++VSite) {
> +
> +      uint32_t CurrentVDataCount = swap(*CurrentVDataCounts);
> +      ++CurrentVDataCounts;
> +      if ((const char*)(CurrentVData + CurrentVDataCount) > ProfileEnd)
> +        return error(instrprof_error::malformed);
> +
> +      InstrProfValueSiteRecord NewVRecord;
> +      for (uint32_t VIndex = 0; VIndex < CurrentVDataCount; ++VIndex) {
> +        uint64_t TargetValue = swap(CurrentVData->Value);
> +        if (Kind == IPVK_IndirectCallTarget) {
> +          auto I = FunctionPtrToNameMap.find(TargetValue);
> +          if (I == FunctionPtrToNameMap.end()) {
> +            ++CurrentVData;
> +            continue;
> +          }
> +          TargetValue = (uint64_t)I->second;
> +        }
> +        NewVRecord.ValueData.push_back(
> +           std::make_pair(TargetValue, swap(CurrentVData->NumTaken)));
> +        ++CurrentVData;
> +      }
> +      ValueSites.push_back(NewVRecord);
> +    }
> +  }
> +
>    // Iterate.
>    ++Data;
>    return success();
> @@ -483,6 +548,29 @@
>    return error(instrprof_error::hash_mismatch);
>  }
>
> +std::error_code IndexedInstrProfReader::getFunctionValuesForKind(
> +  StringRef FuncName, uint64_t FuncHash, uint32_t ValueKind,
> +  std::vector<InstrProfValueSiteRecord> &Values) {
> +
> +  auto Iter = Index->find(FuncName);
> +  if (Iter == Index->end())
> +    return error(instrprof_error::unknown_function);
> +
> +  // Found it. Look for counters with the right hash.
> +  ArrayRef<InstrProfRecord> Data = (*Iter);
> +  if (Data.empty())
> +    return error(instrprof_error::malformed);
> +
> +  for (unsigned I = 0, E = Data.size(); I < E; ++I) {
> +    // Check for a match and fill the vector if there is one.
> +    if (Data[I].Hash == FuncHash) {
> +      Values = Data[I].getValueSitesForKind(ValueKind);
> +      return success();
> +    }
> +  }
> +  return error(instrprof_error::hash_mismatch);
> +}
> +
>  std::error_code
>  IndexedInstrProfReader::readNextRecord(InstrProfRecord &Record) {
>    // Are we out of records?
> Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
> ===================================================================
> --- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
> +++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
> @@ -5194,7 +5194,8 @@
>    }
>    case Intrinsic::instrprof_increment:
>      llvm_unreachable("instrprof failed to lower an increment");
> -
> +  case Intrinsic::instrprof_value_profile:
> +    llvm_unreachable("instrprof failed to lower a value profiling call");
>    case Intrinsic::localescape: {
>      MachineFunction &MF = DAG.getMachineFunction();
>      const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo();
> Index: include/llvm/ProfileData/InstrProfReader.h
> ===================================================================
> --- include/llvm/ProfileData/InstrProfReader.h
> +++ include/llvm/ProfileData/InstrProfReader.h
> @@ -20,10 +20,12 @@
>  #include "llvm/ProfileData/InstrProf.h"
>  #include "llvm/Support/EndianStream.h"
>  #include "llvm/Support/ErrorOr.h"
> +#include "llvm/Support/raw_ostream.h"
>  #include "llvm/Support/LineIterator.h"
>  #include "llvm/Support/MemoryBuffer.h"
>  #include "llvm/Support/OnDiskHashTable.h"
>  #include <iterator>
> +#include <map>
>
>  namespace llvm {
>
> @@ -138,6 +140,9 @@
>      const uint64_t FuncHash;
>      const IntPtrT NamePtr;
>      const IntPtrT CounterPtr;
> +    const IntPtrT FunctionPointer;
> +    const uint32_t NumValueSites[IPVK_Last+1];
> +    const IntPtrT Values;
>    };
>    struct RawHeader {
>      const uint64_t Magic;
> @@ -147,6 +152,13 @@
>      const uint64_t NamesSize;
>      const uint64_t CountersDelta;
>      const uint64_t NamesDelta;
> +    const uint64_t ValueKindLast;
> +    const uint64_t ValueDataCountsSize;
> +    const uint64_t ValueDataSize;
> +  };
> +  struct ValueData {
> +    const uint64_t Value;
> +    const uint64_t NumTaken;
>    };
>
>    bool ShouldSwapBytes;
> @@ -157,6 +169,12 @@
>    const uint64_t *CountersStart;
>    const char *NamesStart;
>    const char *ProfileEnd;
> +  uint32_t ValueKindLast;
> +  const ValueData *ValueDataStart;
> +  const uint32_t *CurrentVDataCounts;
> +  const ValueData *CurrentVData;
> +
> +  std::map<const IntPtrT, const char*> FunctionPtrToNameMap;
>
>    RawInstrProfReader(const RawInstrProfReader &) = delete;
>    RawInstrProfReader &operator=(const RawInstrProfReader &) = delete;
> @@ -271,6 +289,11 @@
>    /// Fill Counts with the profile data for the given function name.
>    std::error_code getFunctionCounts(StringRef FuncName, uint64_t FuncHash,
>                                      std::vector<uint64_t> &Counts);
> +  /// Return value profile data for the given function name and hash and
> +  /// value profiling kind
> +  std::error_code getFunctionValuesForKind(StringRef FuncName,
> +      uint64_t FuncHash, uint32_t ValueKind,
> +      std::vector<InstrProfValueSiteRecord> &Values);
>    /// Return the maximum of all known function counts.
>    uint64_t getMaximumFunctionCount() { return MaxFunctionCount; }
>
> Index: include/llvm/ProfileData/InstrProf.h
> ===================================================================
> --- include/llvm/ProfileData/InstrProf.h
> +++ include/llvm/ProfileData/InstrProf.h
> @@ -85,7 +85,14 @@
>      });
>    }
>
> -  /// Merge data from another InstrProfValueSiteRecord
> +  /// Sort ValueData descending by NumTaken
> +  void sortByNumTaken() {
> +    ValueData.sort([](const ValueDataPair &left, const ValueDataPair &right) {
> +      return left.second > right.second;
> +    });
> +  }
> +
> +  // Merge data from another InstrProfValueSiteRecord
>    void mergeValueData(InstrProfValueSiteRecord &Input) {
>      this->sortByTargetValues();
>      Input.sortByTargetValues();
> @@ -130,6 +137,12 @@
>          const_cast<const InstrProfRecord *>(this)
>              ->getValueSitesForKind(ValueKind));
>    }
> +
> +  // Clear value data entries
> +  void clearValueData() {
> +    for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
> +      getValueSitesForKind(Kind).clear();
> +  }
>  };
>
>  } // end namespace llvm
> Index: include/llvm/IR/Intrinsics.td
> ===================================================================
> --- include/llvm/IR/Intrinsics.td
> +++ include/llvm/IR/Intrinsics.td
> @@ -319,7 +319,15 @@
>                                          [llvm_ptr_ty, llvm_i64_ty,
>                                           llvm_i32_ty, llvm_i32_ty],
>                                          []>;
> -
> +
> +// A call to profile runtime for value profiling of target expressions
> +// through instrumentation based profiling.
> +def int_instrprof_value_profile : Intrinsic<[],
> +                                            [llvm_ptr_ty, llvm_i64_ty,
> +                                             llvm_i64_ty, llvm_i32_ty,
> +                                             llvm_i32_ty],
> +                                            []>;
> +
>  //===------------------- Standard C Library Intrinsics --------------------===//
>  //
>
> Index: include/llvm/IR/IntrinsicInst.h
> ===================================================================
> --- include/llvm/IR/IntrinsicInst.h
> +++ include/llvm/IR/IntrinsicInst.h
> @@ -372,6 +372,39 @@
>        return cast<ConstantInt>(const_cast<Value *>(getArgOperand(3)));
>      }
>    };
> -}
> +
> +  /// This represents the llvm.instrprof_value_profile intrinsic.
> +  class InstrProfValueProfileInst : public IntrinsicInst {
> +  public:
> +    static inline bool classof(const IntrinsicInst *I) {
> +      return I->getIntrinsicID() == Intrinsic::instrprof_value_profile;
> +    }
> +    static inline bool classof(const Value *V) {
> +      return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
> +    }
> +
> +    GlobalVariable *getName() const {
> +      return cast<GlobalVariable>(
> +          const_cast<Value *>(getArgOperand(0))->stripPointerCasts());
> +    }
> +
> +    ConstantInt *getHash() const {
> +      return cast<ConstantInt>(const_cast<Value *>(getArgOperand(1)));
> +    }
> +
> +    Value *getTargetValue() const {
> +      return cast<Value>(const_cast<Value *>(getArgOperand(2)));
> +    }
> +
> +    ConstantInt *getValueKind() const {
> +      return cast<ConstantInt>(const_cast<Value *>(getArgOperand(3)));
> +    }
> +
> +    // Returns the value site index.
> +    ConstantInt *getIndex() const {
> +      return cast<ConstantInt>(const_cast<Value *>(getArgOperand(4)));
> +    }
> +  };
> +} // namespace llvm
>
>  #endif
> Index: docs/LangRef.rst
> ===================================================================
> --- docs/LangRef.rst
> +++ docs/LangRef.rst
> @@ -9424,6 +9424,55 @@
>  format that can be written out by a compiler runtime and consumed via
>  the ``llvm-profdata`` tool.
>
> +'``llvm.instrprof_value_profile``' Intrinsic
> +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
> +
> +Syntax:
> +"""""""
> +
> +::
> +
> +      declare void @llvm.instrprof_value_profile(i8* <name>, i64 <hash>,
> +                                                 i64 <value>, i32 <value_kind>,
> +                                                 i32 <index>)
> +
> +Overview:
> +"""""""""
> +
> +The '``llvm.instrprof_value_profile``' intrinsic can be emitted by a
> +frontend for use with instrumentation based profiling. This will be
> +lowered by the ``-instrprof`` pass to find out the target values,
> +instrumented expressions take in a program at runtime.
> +
> +Arguments:
> +""""""""""
> +
> +The first argument is a pointer to a global variable containing the
> +name of the entity being instrumented. ``name`` should generally be the
> +(mangled) function name for a set of counters.
> +
> +The second argument is a hash value that can be used by the consumer
> +of the profile data to detect changes to the instrumented source. It
> +is an error if ``hash`` differs between two instances of
> +``llvm.instrprof_*`` that refer to the same name.
> +
> +The third argument is the value of the expression being profiled. The profiled
> +expression's value should be representable as an unsigned 64-bit value. The
> +fourth argument represents the kind of value profiling that is being done. The
> +supported value profiling kinds are enumerated through the
> +``InstrProfValueKind`` type declared in the
> +``<include/llvm/ProfileData/InstrProf.h>`` header file. The last argument is the
> +index of the instrumented expression within ``name``. It should be >= 0.
> +
> +Semantics:
> +""""""""""
> +
> +This intrinsic represents the point where a call to a runtime routine
> +should be inserted for value profiling of target expressions. ``-instrprof``
> +pass will generate the appropriate data structures and replace the
> +``llvm.instrprof_value_profile`` intrinsic with the call to the profile
> +runtime library with proper arguments.
> +
>  Standard C Library Intrinsics
>  -----------------------------
>


More information about the llvm-commits mailing list