[llvm] r249724 - Re-apply r249644: Handle inline stacks in gcov-encoded sample profiles.

Diego Novillo via llvm-commits llvm-commits at lists.llvm.org
Thu Oct 8 12:40:38 PDT 2015


Author: dnovillo
Date: Thu Oct  8 14:40:37 2015
New Revision: 249724

URL: http://llvm.org/viewvc/llvm-project?rev=249724&view=rev
Log:
Re-apply r249644: Handle inline stacks in gcov-encoded sample profiles.

This fixes memory allocation problems by making the merge operation keep
the profile readers around until the merged profile has been emitted.
This is needed to prevent the inlined function names to disappear from
the function profiles. Since all the names are kept as references, once
the reader disappears, the names are also deallocated.

Additionally, XFAIL on big-endian architectures. The test case uses a
gcov file generated on a little-endian system.

Added:
    llvm/trunk/test/tools/llvm-profdata/Inputs/gcc-sample-profile.gcov
    llvm/trunk/test/tools/llvm-profdata/gcc-gcov-sample-profile.test
Modified:
    llvm/trunk/include/llvm/ProfileData/SampleProf.h
    llvm/trunk/include/llvm/ProfileData/SampleProfReader.h
    llvm/trunk/include/llvm/ProfileData/SampleProfWriter.h
    llvm/trunk/lib/ProfileData/SampleProfReader.cpp
    llvm/trunk/lib/ProfileData/SampleProfWriter.cpp
    llvm/trunk/tools/llvm-profdata/llvm-profdata.cpp

Modified: llvm/trunk/include/llvm/ProfileData/SampleProf.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/ProfileData/SampleProf.h?rev=249724&r1=249723&r2=249724&view=diff
==============================================================================
--- llvm/trunk/include/llvm/ProfileData/SampleProf.h (original)
+++ llvm/trunk/include/llvm/ProfileData/SampleProf.h Thu Oct  8 14:40:37 2015
@@ -208,7 +208,7 @@ typedef DenseMap<CallsiteLocation, Funct
 class FunctionSamples {
 public:
   FunctionSamples() : TotalSamples(0), TotalHeadSamples(0) {}
-  void print(raw_ostream &OS = dbgs());
+  void print(raw_ostream &OS = dbgs(), unsigned Indent = 0) const;
   void addTotalSamples(unsigned Num) { TotalSamples += Num; }
   void addHeadSamples(unsigned Num) { TotalHeadSamples += Num; }
   void addBodySamples(int LineOffset, unsigned Discriminator, unsigned Num) {
@@ -302,6 +302,22 @@ private:
   /// are an offset from the start of the function.
   BodySampleMap BodySamples;
 
+  /// Map call sites to collected samples for the called function.
+  ///
+  /// Each entry in this map corresponds to all the samples
+  /// collected for the inlined function call at the given
+  /// location. For example, given:
+  ///
+  ///     void foo() {
+  ///  1    bar();
+  ///  ...
+  ///  8    baz();
+  ///     }
+  ///
+  /// If the bar() and baz() calls were inlined inside foo(), this
+  /// map will contain two entries.  One for all the samples collected
+  /// in the call to bar() at line offset 1, the other for all the samples
+  /// collected in the call to baz() at line offset 8.
   CallsiteSampleMap CallsiteSamples;
 };
 

Modified: llvm/trunk/include/llvm/ProfileData/SampleProfReader.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/ProfileData/SampleProfReader.h?rev=249724&r1=249723&r2=249724&view=diff
==============================================================================
--- llvm/trunk/include/llvm/ProfileData/SampleProfReader.h (original)
+++ llvm/trunk/include/llvm/ProfileData/SampleProfReader.h Thu Oct  8 14:40:37 2015
@@ -189,7 +189,7 @@ struct SourceInfo {
   uint32_t Discriminator;
 };
 
-typedef std::vector<SourceInfo> SourceStack;
+typedef SmallVector<FunctionSamples *, 10> InlineCallStack;
 
 // Supported histogram types in GCC.  Currently, we only need support for
 // call target histograms.
@@ -220,9 +220,8 @@ public:
 
 protected:
   std::error_code readNameTable();
-  std::error_code addSourceCount(StringRef Name, const SourceStack &Src,
-                                 uint64_t Count);
-  std::error_code readOneFunctionProfile(const SourceStack &Stack, bool Update);
+  std::error_code readOneFunctionProfile(const InlineCallStack &InlineStack,
+                                         bool Update, uint32_t Offset);
   std::error_code readFunctionProfiles();
   std::error_code skipNextWord();
   template <typename T> ErrorOr<T> readNumber();

Modified: llvm/trunk/include/llvm/ProfileData/SampleProfWriter.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/ProfileData/SampleProfWriter.h?rev=249724&r1=249723&r2=249724&view=diff
==============================================================================
--- llvm/trunk/include/llvm/ProfileData/SampleProfWriter.h (original)
+++ llvm/trunk/include/llvm/ProfileData/SampleProfWriter.h Thu Oct  8 14:40:37 2015
@@ -84,12 +84,18 @@ protected:
 class SampleProfileWriterText : public SampleProfileWriter {
 public:
   SampleProfileWriterText(StringRef F, std::error_code &EC)
-      : SampleProfileWriter(F, EC, sys::fs::F_Text) {}
+      : SampleProfileWriter(F, EC, sys::fs::F_Text), Indent(0) {}
 
   bool write(StringRef FName, const FunctionSamples &S) override;
   bool write(const Module &M, StringMap<FunctionSamples> &P) {
     return SampleProfileWriter::write(M, P);
   }
+
+private:
+  /// Indent level to use when writing.
+  ///
+  /// This is used when printing inlined callees.
+  unsigned Indent;
 };
 
 /// \brief Sample-based profile writer (binary format).

Modified: llvm/trunk/lib/ProfileData/SampleProfReader.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/ProfileData/SampleProfReader.cpp?rev=249724&r1=249723&r2=249724&view=diff
==============================================================================
--- llvm/trunk/lib/ProfileData/SampleProfReader.cpp (original)
+++ llvm/trunk/lib/ProfileData/SampleProfReader.cpp Thu Oct  8 14:40:37 2015
@@ -132,13 +132,14 @@ using namespace llvm;
 /// \brief Print the samples collected for a function on stream \p OS.
 ///
 /// \param OS Stream to emit the output to.
-void FunctionSamples::print(raw_ostream &OS) {
+void FunctionSamples::print(raw_ostream &OS, unsigned Indent) const {
   OS << TotalSamples << ", " << TotalHeadSamples << ", " << BodySamples.size()
      << " sampled lines\n";
   for (const auto &SI : BodySamples) {
     LineLocation Loc = SI.first;
     const SampleRecord &Sample = SI.second;
-    OS << "\tline offset: " << Loc.LineOffset
+    OS.indent(Indent);
+    OS << "line offset: " << Loc.LineOffset
        << ", discriminator: " << Loc.Discriminator
        << ", number of samples: " << Sample.getSamples();
     if (Sample.hasCalls()) {
@@ -148,7 +149,15 @@ void FunctionSamples::print(raw_ostream
     }
     OS << "\n";
   }
-  OS << "\n";
+  for (const auto &CS : CallsiteSamples) {
+    CallsiteLocation Loc = CS.first;
+    const FunctionSamples &CalleeSamples = CS.second;
+    OS.indent(Indent);
+    OS << "line offset: " << Loc.LineOffset
+       << ", discriminator: " << Loc.Discriminator
+       << ", inlined callee: " << Loc.CalleeName << ": ";
+    CalleeSamples.print(OS, Indent + 2);
+  }
 }
 
 /// \brief Dump the function profile for \p FName.
@@ -266,7 +275,7 @@ static bool ParseLine(const StringRef &I
 std::error_code SampleProfileReaderText::read() {
   line_iterator LineIt(*Buffer, /*SkipBlanks=*/true, '#');
 
-  SmallVector<FunctionSamples *, 10> InlineStack;
+  InlineCallStack InlineStack;
 
   for (; !LineIt.is_at_eof(); ++LineIt) {
     if ((*LineIt)[(*LineIt).find_first_not_of(' ')] == '#')
@@ -559,31 +568,18 @@ std::error_code SampleProfileReaderGCC::
   if (!GcovBuffer.readInt(NumFunctions))
     return sampleprof_error::truncated;
 
-  SourceStack Stack;
+  InlineCallStack Stack;
   for (uint32_t I = 0; I < NumFunctions; ++I)
-    if (std::error_code EC = readOneFunctionProfile(Stack, true))
+    if (std::error_code EC = readOneFunctionProfile(Stack, true, 0))
       return EC;
 
   return sampleprof_error::success;
 }
 
-std::error_code SampleProfileReaderGCC::addSourceCount(StringRef Name,
-                                                       const SourceStack &Src,
-                                                       uint64_t Count) {
-  if (Src.size() == 0 || Src[0].Malformed())
-    return sampleprof_error::malformed;
-  FunctionSamples &FProfile = Profiles[Name];
-  FProfile.addTotalSamples(Count);
-  // FIXME(dnovillo) - Properly update inline stack for FnName.
-  FProfile.addBodySamples(Src[0].Line, Src[0].Discriminator, Count);
-  return sampleprof_error::success;
-}
-
-std::error_code
-SampleProfileReaderGCC::readOneFunctionProfile(const SourceStack &Stack,
-                                               bool Update) {
+std::error_code SampleProfileReaderGCC::readOneFunctionProfile(
+    const InlineCallStack &InlineStack, bool Update, uint32_t Offset) {
   uint64_t HeadCount = 0;
-  if (Stack.size() == 0)
+  if (InlineStack.size() == 0)
     if (!GcovBuffer.readInt64(HeadCount))
       return sampleprof_error::truncated;
 
@@ -597,15 +593,31 @@ SampleProfileReaderGCC::readOneFunctionP
   if (!GcovBuffer.readInt(NumPosCounts))
     return sampleprof_error::truncated;
 
-  uint32_t NumCallSites;
-  if (!GcovBuffer.readInt(NumCallSites))
+  uint32_t NumCallsites;
+  if (!GcovBuffer.readInt(NumCallsites))
     return sampleprof_error::truncated;
 
-  if (Stack.size() == 0) {
-    FunctionSamples &FProfile = Profiles[Name];
-    FProfile.addHeadSamples(HeadCount);
-    if (FProfile.getTotalSamples() > 0)
+  FunctionSamples *FProfile = nullptr;
+  if (InlineStack.size() == 0) {
+    // If this is a top function that we have already processed, do not
+    // update its profile again.  This happens in the presence of
+    // function aliases.  Since these aliases share the same function
+    // body, there will be identical replicated profiles for the
+    // original function.  In this case, we simply not bother updating
+    // the profile of the original function.
+    FProfile = &Profiles[Name];
+    FProfile->addHeadSamples(HeadCount);
+    if (FProfile->getTotalSamples() > 0)
       Update = false;
+  } else {
+    // Otherwise, we are reading an inlined instance. The top of the
+    // inline stack contains the profile of the caller. Insert this
+    // callee in the caller's CallsiteMap.
+    FunctionSamples *CallerProfile = InlineStack.front();
+    uint32_t LineOffset = Offset >> 16;
+    uint32_t Discriminator = Offset & 0xffff;
+    FProfile = &CallerProfile->functionSamplesAt(
+        CallsiteLocation(LineOffset, Discriminator, Name));
   }
 
   for (uint32_t I = 0; I < NumPosCounts; ++I) {
@@ -621,13 +633,28 @@ SampleProfileReaderGCC::readOneFunctionP
     if (!GcovBuffer.readInt64(Count))
       return sampleprof_error::truncated;
 
-    SourceInfo Info(Name, "", "", 0, Offset >> 16, Offset & 0xffff);
-    SourceStack NewStack;
-    NewStack.push_back(Info);
-    NewStack.insert(NewStack.end(), Stack.begin(), Stack.end());
-    if (Update)
-      addSourceCount(NewStack[NewStack.size() - 1].FuncName, NewStack, Count);
+    // The line location is encoded in the offset as:
+    //   high 16 bits: line offset to the start of the function.
+    //   low 16 bits: discriminator.
+    uint32_t LineOffset = Offset >> 16;
+    uint32_t Discriminator = Offset & 0xffff;
+
+    InlineCallStack NewStack;
+    NewStack.push_back(FProfile);
+    NewStack.insert(NewStack.end(), InlineStack.begin(), InlineStack.end());
+    if (Update) {
+      // Walk up the inline stack, adding the samples on this line to
+      // the total sample count of the callers in the chain.
+      for (auto CallerProfile : NewStack)
+        CallerProfile->addTotalSamples(Count);
+
+      // Update the body samples for the current profile.
+      FProfile->addBodySamples(LineOffset, Discriminator, Count);
+    }
 
+    // Process the list of functions called at an indirect call site.
+    // These are all the targets that a function pointer (or virtual
+    // function) resolved at runtime.
     for (uint32_t J = 0; J < NumTargets; J++) {
       uint32_t HistVal;
       if (!GcovBuffer.readInt(HistVal))
@@ -647,24 +674,25 @@ SampleProfileReaderGCC::readOneFunctionP
 
       if (Update) {
         FunctionSamples &TargetProfile = Profiles[TargetName];
-        TargetProfile.addBodySamples(NewStack[0].Line,
-                                     NewStack[0].Discriminator, TargetCount);
+        TargetProfile.addCalledTargetSamples(LineOffset, Discriminator,
+                                             TargetName, TargetCount);
       }
     }
   }
 
-  for (uint32_t I = 0; I < NumCallSites; I++) {
+  // Process all the inlined callers into the current function. These
+  // are all the callsites that were inlined into this function.
+  for (uint32_t I = 0; I < NumCallsites; I++) {
     // The offset is encoded as:
     //   high 16 bits: line offset to the start of the function.
     //   low 16 bits: discriminator.
     uint32_t Offset;
     if (!GcovBuffer.readInt(Offset))
       return sampleprof_error::truncated;
-    SourceInfo Info(Name, "", "", 0, Offset >> 16, Offset & 0xffff);
-    SourceStack NewStack;
-    NewStack.push_back(Info);
-    NewStack.insert(NewStack.end(), Stack.begin(), Stack.end());
-    if (std::error_code EC = readOneFunctionProfile(NewStack, Update))
+    InlineCallStack NewStack;
+    NewStack.push_back(FProfile);
+    NewStack.insert(NewStack.end(), InlineStack.begin(), InlineStack.end());
+    if (std::error_code EC = readOneFunctionProfile(NewStack, Update, Offset))
       return EC;
   }
 

Modified: llvm/trunk/lib/ProfileData/SampleProfWriter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/ProfileData/SampleProfWriter.cpp?rev=249724&r1=249723&r2=249724&view=diff
==============================================================================
--- llvm/trunk/lib/ProfileData/SampleProfWriter.cpp (original)
+++ llvm/trunk/lib/ProfileData/SampleProfWriter.cpp Thu Oct  8 14:40:37 2015
@@ -31,15 +31,15 @@ using namespace llvm;
 
 /// \brief Write samples to a text file.
 bool SampleProfileWriterText::write(StringRef FName, const FunctionSamples &S) {
-  if (S.empty())
-    return true;
-
-  OS << FName << ":" << S.getTotalSamples() << ":" << S.getHeadSamples()
-     << "\n";
+  OS << FName << ":" << S.getTotalSamples();
+  if (Indent == 0)
+    OS << ":" << S.getHeadSamples();
+  OS << "\n";
 
   for (const auto &I : S.getBodySamples()) {
     LineLocation Loc = I.first;
     const SampleRecord &Sample = I.second;
+    OS.indent(Indent + 1);
     if (Loc.Discriminator == 0)
       OS << Loc.LineOffset << ": ";
     else
@@ -52,6 +52,19 @@ bool SampleProfileWriterText::write(Stri
     OS << "\n";
   }
 
+  Indent += 1;
+  for (const auto &I : S.getCallsiteSamples()) {
+    CallsiteLocation Loc = I.first;
+    const FunctionSamples &CalleeSamples = I.second;
+    OS.indent(Indent);
+    if (Loc.Discriminator == 0)
+      OS << Loc.LineOffset << ": ";
+    else
+      OS << Loc.LineOffset << "." << Loc.Discriminator << ": ";
+    write(Loc.CalleeName, CalleeSamples);
+  }
+  Indent -= 1;
+
   return true;
 }
 

Added: llvm/trunk/test/tools/llvm-profdata/Inputs/gcc-sample-profile.gcov
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-profdata/Inputs/gcc-sample-profile.gcov?rev=249724&view=auto
==============================================================================
Binary files llvm/trunk/test/tools/llvm-profdata/Inputs/gcc-sample-profile.gcov (added) and llvm/trunk/test/tools/llvm-profdata/Inputs/gcc-sample-profile.gcov Thu Oct  8 14:40:37 2015 differ

Added: llvm/trunk/test/tools/llvm-profdata/gcc-gcov-sample-profile.test
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-profdata/gcc-gcov-sample-profile.test?rev=249724&view=auto
==============================================================================
--- llvm/trunk/test/tools/llvm-profdata/gcc-gcov-sample-profile.test (added)
+++ llvm/trunk/test/tools/llvm-profdata/gcc-gcov-sample-profile.test Thu Oct  8 14:40:37 2015
@@ -0,0 +1,29 @@
+The input gcov file has been generated on a little endian machine. Expect
+failures on big endian systems.
+
+XFAIL: powerpc64-, s390x, mips-, mips64-, sparc
+
+Tests for sample profiles encoded in GCC's gcov format.
+
+1- Show all functions. This profile has a single main() function with several
+   inlined callees.
+RUN: llvm-profdata show --sample %p/Inputs/gcc-sample-profile.gcov | FileCheck %s --check-prefix=SHOW1
+SHOW1: Function: main: 364084, 0, 6 sampled lines
+SHOW1: line offset: 2, discriminator: 3, inlined callee: _Z3fool: 243786, 0, 3 sampled lines
+SHOW1:   line offset: 1, discriminator: 3, inlined callee: _Z3bari: 0, 0, 2 sampled lines
+SHOW1:   line offset: 1, discriminator: 8, inlined callee: _Z3bari: 0, 0, 2 sampled lines
+SHOW1:   line offset: 1, discriminator: 7, inlined callee: _Z3bari: 98558, 0, 2 sampled lines
+
+2- Convert the profile to text encoding and check that they are both
+   identical.
+RUN: llvm-profdata merge --sample %p/Inputs/gcc-sample-profile.gcov --text -o - | llvm-profdata show --sample - -o %t-text
+RUN: llvm-profdata show --sample %p/Inputs/gcc-sample-profile.gcov -o %t-gcov
+RUN: diff %t-text %t-gcov
+
+4- Merge the gcov and text encodings of the profile and check that the
+   counters have doubled.
+RUN: llvm-profdata merge --sample --text %p/Inputs/gcc-sample-profile.gcov -o %t-gcov
+RUN: llvm-profdata merge --sample --text %p/Inputs/gcc-sample-profile.gcov %t-gcov -o - | FileCheck %s --check-prefix=MERGE1
+MERGE1: main:728168:0
+MERGE1: 2.3: 120298
+MERGE1: 2.3: _Z3fool:487572

Modified: llvm/trunk/tools/llvm-profdata/llvm-profdata.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-profdata/llvm-profdata.cpp?rev=249724&r1=249723&r2=249724&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-profdata/llvm-profdata.cpp (original)
+++ llvm/trunk/tools/llvm-profdata/llvm-profdata.cpp Thu Oct  8 14:40:37 2015
@@ -77,13 +77,19 @@ static void mergeSampleProfile(const cl:
 
   auto Writer = std::move(WriterOrErr.get());
   StringMap<FunctionSamples> ProfileMap;
+  SmallVector<std::unique_ptr<sampleprof::SampleProfileReader>, 5> Readers;
   for (const auto &Filename : Inputs) {
     auto ReaderOrErr =
         SampleProfileReader::create(Filename, getGlobalContext());
     if (std::error_code EC = ReaderOrErr.getError())
       exitWithError(EC.message(), Filename);
 
-    auto Reader = std::move(ReaderOrErr.get());
+    // We need to keep the readers around until after all the files are
+    // read so that we do not lose the function names stored in each
+    // reader's memory. The function names are needed to write out the
+    // merged profile map.
+    Readers.push_back(std::move(ReaderOrErr.get()));
+    const auto Reader = Readers.back().get();
     if (std::error_code EC = Reader->read())
       exitWithError(EC.message(), Filename);
 




More information about the llvm-commits mailing list