[PATCH] Introduce the InstrProfReader interface and a text reader

Wed Mar 19 16:06:35 PDT 2014

On Mar 19, 2014, at 3:52 PM, Justin Bogner <mail at justinbogner.com> wrote:

> This patch introduces the ProfileData library without going into details
> about the data format that instrumentation based profiling should use,
> instead just sticking to the naive text format we have now.
> 
> This is designed to make readers for multiple formats simple, so that a
> "raw" format for compiler-rt to commit and an efficient format for clang
> to consume is easy.
> 
> Eric, would you mind checking the build system changes? It builds with
> both clang and autoconf for me, but I'd appreciate a look over by
> someone who has a clue how our build works.
> 
> <TextInstrProfReader.patch>

LGTM, with a couple of fixups.

But I don’t understand our build system either, so you should wait for
someone to check that over.

> diff --git a/include/llvm/ProfileData/InstrProfReader.h b/include/llvm/ProfileData/InstrProfReader.h
> new file mode 100644
> index 0000000..fb09c75
> --- /dev/null
> +++ b/include/llvm/ProfileData/InstrProfReader.h
> @@ -0,0 +1,118 @@
> +//=-- InstrProfReader.h - Instrumented profiling readers ----------*- C++ -*-=//
> +//
> +//                     The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
> +//===----------------------------------------------------------------------===//
> +//
> +// This file contains support for reading profiling data for instrumentation
> +// based PGO and coverage.
> +//
> +//===----------------------------------------------------------------------===//
> +
> +#ifndef LLVM_PROFILEDATA_INSTRPROF_READER_H__
> +#define LLVM_PROFILEDATA_INSTRPROF_READER_H__
> +
> +#include "llvm/ADT/ArrayRef.h"
> +#include "llvm/ProfileData/InstrProf.h"
> +#include "llvm/Support/LineIterator.h"
> +#include "llvm/Support/MemoryBuffer.h"
> +
> +#include <iterator>
> +
> +namespace llvm {
> +
> +class InstrProfReader;
> +
> +/// Profiling information for a single function.
> +struct InstrProfRecord {
> +  StringRef Name;
> +  uint64_t Hash;
> +  ArrayRef<uint64_t> Counts;
> +};
> +
> +/// A file format agnostic iterator over profiling data.
> +class InstrProfIterator : public std::iterator<std::input_iterator_tag,
> +                                               InstrProfRecord> {
> +  InstrProfReader *Reader;
> +  InstrProfRecord Record;
> +
> +  void Increment();
> +public:
> +  InstrProfIterator() : Reader(nullptr) {}
> +  InstrProfIterator(InstrProfReader *Reader) : Reader(Reader) { Increment(); }
> +
> +  InstrProfIterator &operator++() { Increment(); return *this; }
> +  bool operator==(const InstrProfIterator &RHS) { return Reader == RHS.Reader; }
> +  bool operator!=(const InstrProfIterator &RHS) { return Reader != RHS.Reader; }
> +  InstrProfRecord &operator*() { return Record; }
> +  InstrProfRecord *operator->() { return &Record; }
> +};
> +
> +/// Base class and interface for reading profiling data of any known instrprof
> +/// format. Provides an iterator over InstrProfRecords.
> +class InstrProfReader {
> +  error_code LastError;
> +public:
> +  InstrProfReader() : LastError(instrprof_error::success) {}
> +  virtual ~InstrProfReader() {}
> +
> +  /// Read a single record.
> +  virtual error_code readNextRecord(InstrProfRecord &Record) = 0;
> +  /// Iterator over profile data.
> +  virtual InstrProfIterator begin() { return InstrProfIterator(this); }
> +  virtual InstrProfIterator end() { return InstrProfIterator(); }

Do these need to be virtual?

> +
> +  /// Set the current error_code and return same.
> +  error_code error(error_code EC) {
> +    LastError = EC;
> +    return EC;
> +  }
> +  /// Clear the current error code and return a successful one.
> +  error_code success() { return error(instrprof_error::success); }
> +
> +  /// Return true if the reader has finished reading the profile data.
> +  bool isEOF() { return LastError == instrprof_error::eof; }
> +  /// Return true if the reader encountered an error reading profiling data.
> +  bool hasError() { return LastError && !isEOF(); }
> +  /// Get the current error code.
> +  error_code getError() { return LastError; }
> +
> +  /// Factory method to create an appropriately typed reader for the given
> +  /// instrprof file.
> +  static error_code create(std::string Path,
> +                           std::unique_ptr<InstrProfReader> &Result);
> +};
> +
> +/// Reader for the simple text based instrprof format.
> +///
> +/// This format is a simple text format that's suitable for test data. Records
> +/// are separated by one or more blank lines, and record fields are separated by
> +/// new lines.
> +///
> +/// Each record consists of a function name, a function hash, a number of
> +/// counters, and then each counter value, in that order.
> +class TextInstrProfReader : public InstrProfReader {
> +private:
> +  /// The profile data file contents.
> +  std::unique_ptr<MemoryBuffer> DataBuffer;
> +  /// Iterator over the profile data.
> +  line_iterator Line;
> +  /// The current set of counter values.
> +  std::vector<uint64_t> Counts;
> +
> +  TextInstrProfReader(const TextInstrProfReader &) LLVM_DELETED_FUNCTION;
> +  TextInstrProfReader &operator=(const TextInstrProfReader &)
> +    LLVM_DELETED_FUNCTION;
> +public:
> +  TextInstrProfReader(std::unique_ptr<MemoryBuffer> &DataBuffer_)
> +      : DataBuffer(DataBuffer_.release()), Line(*DataBuffer, '#') {}
> +
> +  error_code readNextRecord(InstrProfRecord &Record) override;
> +};
> +
> +} // end namespace llvm
> +

> diff --git a/lib/ProfileData/InstrProf.cpp b/lib/ProfileData/InstrProf.cpp
> new file mode 100644
> index 0000000..329abf8
> --- /dev/null
> +++ b/lib/ProfileData/InstrProf.cpp
> @@ -0,0 +1,56 @@
> +//=-- InstrProf.cpp - Instrumented profiling format support -----------------=//
> +//
> +//                     The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
> +//===----------------------------------------------------------------------===//
> +//
> +// This file contains support for clang's instrumentation based PGO and
> +// coverage.
> +//
> +//===----------------------------------------------------------------------===//
> +
> +#include "llvm/ProfileData/InstrProf.h"
> +#include "llvm/Support/ErrorHandling.h"
> +
> +using namespace llvm;
> +
> +namespace {
> +class InstrProfErrorCategoryType : public error_category {
> +  const char *name() const override { return "llvm.instrprof"; }
> +  std::string message(int IE) const override {
> +    instrprof_error::ErrorType E = static_cast<instrprof_error::ErrorType>(IE);
> +    switch (E) {
> +    case instrprof_error::success:
> +      return "Success";
> +    case instrprof_error::eof:
> +      return "End of File";
> +    case instrprof_error::bad_magic:
> +      return "Invalid file format (bad magic)";
> +    case instrprof_error::unsupported_version:
> +      return "Unsupported format version";
> +    case instrprof_error::too_large:
> +      return "Too much profile data";
> +    case instrprof_error::truncated:
> +      return "Truncated profile data";
> +    case instrprof_error::malformed:
> +      return "Malformed profile data";
> +    case instrprof_error::unknown_function:
> +      return "No profile data available for function";
> +    }
> +    llvm_unreachable("A value of instrprof_error has no message.");
> +  }
> +  error_condition default_error_condition(int EV) const {
> +    if (EV == instrprof_error::success)
> +      return errc::success;
> +    return errc::invalid_argument;
> +  }
> +};
> +}
> +
> +const error_category &llvm::instrprof_category() {
> +  static InstrProfErrorCategoryType C;
> +  return C;
> +}
> diff --git a/lib/ProfileData/InstrProfReader.cpp b/lib/ProfileData/InstrProfReader.cpp
> new file mode 100644
> index 0000000..8b36000
> --- /dev/null
> +++ b/lib/ProfileData/InstrProfReader.cpp
> @@ -0,0 +1,84 @@
> +//=-- InstrProfReader.cpp - Instrumented profiling reader -------------------=//
> +//
> +//                     The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
> +//===----------------------------------------------------------------------===//
> +//
> +// This file contains support for reading profiling data for clang's
> +// instrumentation based PGO and coverage.
> +//
> +//===----------------------------------------------------------------------===//
> +
> +#include "llvm/ProfileData/InstrProfReader.h"
> +#include "llvm/ProfileData/InstrProf.h"
> +#include "llvm/Support/Endian.h"
> +
> +#include <cassert>
> +
> +using namespace llvm;
> +
> +error_code InstrProfReader::create(std::string Path,
> +                                   std::unique_ptr<InstrProfReader> &Result) {
> +  std::unique_ptr<MemoryBuffer> Buffer;
> +  if (error_code EC = MemoryBuffer::getFileOrSTDIN(Path, Buffer))
> +    return EC;
> +
> +  // Sanity check the file.
> +  if (Buffer->getBufferSize() > std::numeric_limits<unsigned>::max())
> +    return instrprof_error::too_large;
> +
> +  // FIXME: This needs to determine which format the file is and construct the
> +  // correct subclass.
> +  Result.reset(new TextInstrProfReader(Buffer));
> +
> +  return instrprof_error::success;
> +}
> +
> +void InstrProfIterator::Increment() {
> +  if (Reader->readNextRecord(Record))
> +    *this = InstrProfIterator();
> +}
> +
> +error_code TextInstrProfReader::readNextRecord(InstrProfRecord &Record) {
> +  // Skip empty lines.
> +  while (!Line.is_at_end() && Line->empty())
> +    ++Line;
> +  // If we hit EOF while looking for a name, we're done.
> +  if (Line.is_at_end())
> +    return error(instrprof_error::eof);
> +
> +  // Read the function name.
> +  Record.Name = *Line++;
> +
> +  // Read the function hash.
> +  if (Line.is_at_end())
> +    return error(instrprof_error::truncated);
> +  if ((Line++)->getAsInteger(10, Record.Hash))
> +    return error(instrprof_error::malformed);
> +
> +  // Read the number of counters.
> +  uint64_t NumCounters;
> +  if (Line.is_at_end())
> +    return error(instrprof_error::truncated);
> +  if ((Line++)->getAsInteger(10, NumCounters))
> +    return error(instrprof_error::malformed);
> +
> +  // Read each counter and fill our internal storage with the values.
> +  Counts.clear();
> +  Counts.reserve(NumCounters);
> +  for (uint64_t I = 0; I < NumCounters; ++I) {
> +    if (Line.is_at_end())
> +      return error(instrprof_error::truncated);
> +    uint64_t Count;
> +    if ((Line++)->getAsInteger(10, Count))
> +      return error(instrprof_error::malformed);
> +    Counts.push_back(Count);
> +  }
> +  // Give the record a reference to our internal counter storage.
> +  Record.Counts = Counts;
> +
> +  return success();
> +}

> diff --git a/test/tools/llvm-profdata/errors.test b/test/tools/llvm-profdata/errors.test
> index 219d88d..afac7a4 100644
> --- a/test/tools/llvm-profdata/errors.test
> +++ b/test/tools/llvm-profdata/errors.test
> @@ -1,19 +1,19 @@
>  RUN: not llvm-profdata merge %p/Inputs/empty.profdata %p/Inputs/foo3-1.profdata 2>&1 | FileCheck %s --check-prefix=LENGTH
>  RUN: not llvm-profdata merge %p/Inputs/foo3-1.profdata %p/Inputs/foo3bar3-1.profdata 2>&1 | FileCheck %s --check-prefix=LENGTH
>  RUN: not llvm-profdata merge %p/Inputs/foo4-1.profdata %p/Inputs/empty.profdata 2>&1 | FileCheck %s --check-prefix=LENGTH
> -LENGTH: error: {{.*}}: truncated file
> +LENGTH: error: Number of instrumented functions differ
>  
>  RUN: not llvm-profdata merge %p/Inputs/foo3-1.profdata %p/Inputs/bar3-1.profdata 2>&1 | FileCheck %s --check-prefix=NAME
> -NAME: error: {{.*}}: function name mismatch
> +NAME: error: Function name mismatch, foo != bar
>  
>  RUN: not llvm-profdata merge %p/Inputs/foo3-1.profdata %p/Inputs/foo4-1.profdata 2>&1 | FileCheck %s --check-prefix=HASH
> -HASH: error: {{.*}}: function hash mismatch
> +HASH: error: Function hash mismatch for foo
>  
>  RUN: not llvm-profdata merge %p/Inputs/overflow.profdata %p/Inputs/overflow.profdata 2>&1 | FileCheck %s --check-prefix=OVERFLOW
> -OVERFLOW: error: {{.*}}: counter overflow
> +OVERFLOW: error: Counter overflow for overflow
>  
>  RUN: not llvm-profdata merge %p/Inputs/invalid-count-later.profdata %p/Inputs/invalid-count-later.profdata 2>&1 | FileCheck %s --check-prefix=INVALID-COUNT-LATER
> -INVALID-COUNT-LATER: error: {{.*}}: invalid counter
> +INVALID-COUNT-LATER: error: {{.*}}: Malformed profile data
>  
>  RUN: not llvm-profdata merge %p/Inputs/bad-hash.profdata %p/Inputs/bad-hash.profdata 2>&1 | FileCheck %s --check-prefix=BAD-HASH
> -BAD-HASH: error: {{.*}}: bad function hash
> +BAD-HASH: error: {{.*}}: Malformed profile data
> diff --git a/tools/llvm-profdata/CMakeLists.txt b/tools/llvm-profdata/CMakeLists.txt
> index 4b1357d..4c64cfa 100644
> --- a/tools/llvm-profdata/CMakeLists.txt
> +++ b/tools/llvm-profdata/CMakeLists.txt
> @@ -1,4 +1,4 @@
> -set(LLVM_LINK_COMPONENTS core support )
> +set(LLVM_LINK_COMPONENTS core profiledata support )
>  
>  add_llvm_tool(llvm-profdata
>    llvm-profdata.cpp
> diff --git a/tools/llvm-profdata/LLVMBuild.txt b/tools/llvm-profdata/LLVMBuild.txt
> index fc9e469..bbc1ea6 100644
> --- a/tools/llvm-profdata/LLVMBuild.txt
> +++ b/tools/llvm-profdata/LLVMBuild.txt
> @@ -19,4 +19,4 @@
>  type = Tool
>  name = llvm-profdata
>  parent = Tools
> -required_libraries = Support
> +required_libraries = ProfileData Support
> diff --git a/tools/llvm-profdata/Makefile b/tools/llvm-profdata/Makefile
> index 9d7ad52..4177780 100644
> --- a/tools/llvm-profdata/Makefile
> +++ b/tools/llvm-profdata/Makefile
> @@ -9,7 +9,7 @@
>  
>  LEVEL := ../..
>  TOOLNAME := llvm-profdata
> -LINK_COMPONENTS := core support
> +LINK_COMPONENTS := core profiledata support
>  
>  # This tool has no plugins, optimize startup time.
>  TOOL_NO_EXPORTS := 1
> diff --git a/tools/llvm-profdata/llvm-profdata.cpp b/tools/llvm-profdata/llvm-profdata.cpp
> index d8abafc..832f3ae 100644
> --- a/tools/llvm-profdata/llvm-profdata.cpp
> +++ b/tools/llvm-profdata/llvm-profdata.cpp
> @@ -12,8 +12,8 @@
>  //===----------------------------------------------------------------------===//
>  
>  #include "llvm/ADT/StringRef.h"
> +#include "llvm/ProfileData/InstrProfReader.h"
>  #include "llvm/Support/CommandLine.h"
> -#include "llvm/Support/LineIterator.h"
>  #include "llvm/Support/ManagedStatic.h"
>  #include "llvm/Support/MemoryBuffer.h"
>  #include "llvm/Support/PrettyStackTrace.h"
> @@ -22,12 +22,11 @@
>  
>  using namespace llvm;
>  
> -static void exitWithError(const std::string &Message,
> -                          const std::string &Filename, int64_t Line = -1) {
> -  errs() << "error: " << Filename;
> -  if (Line >= 0)
> -    errs() << ":" << Line;
> -  errs() << ": " << Message << "\n";
> +static void exitWithError(const Twine &Message, StringRef Whence = "") {
> +  errs() << "error: ";
> +  if (!Whence.empty())
> +    errs() << Whence << ": ";
> +  errs() << Message << "\n";
>    ::exit(1);
>  }
>  
> @@ -46,11 +45,10 @@ int merge_main(int argc, const char *argv[]) {
>  
>    cl::ParseCommandLineOptions(argc, argv, "LLVM profile data merger\n");
>  
> -  std::unique_ptr<MemoryBuffer> File1;
> -  std::unique_ptr<MemoryBuffer> File2;
> -  if (error_code ec = MemoryBuffer::getFile(Filename1, File1))
> +  std::unique_ptr<InstrProfReader> Reader1, Reader2;
> +  if (error_code ec = InstrProfReader::create(Filename1, Reader1))
>      exitWithError(ec.message(), Filename1);
> -  if (error_code ec = MemoryBuffer::getFile(Filename2, File2))
> +  if (error_code ec = InstrProfReader::create(Filename2, Reader2))
>      exitWithError(ec.message(), Filename2);
>  
>    if (OutputFilename.empty())
> @@ -61,64 +59,32 @@ int merge_main(int argc, const char *argv[]) {
>    if (!ErrorInfo.empty())
>      exitWithError(ErrorInfo, OutputFilename);
>  
> -  enum {ReadName, ReadHash, ReadCount, ReadCounters} State = ReadName;
> -  uint64_t N1, N2, NumCounters;
> -  line_iterator I1(*File1, '#'), I2(*File2, '#');
> -  for (; !I1.is_at_end() && !I2.is_at_end(); ++I1, ++I2) {
> -    if (I1->empty()) {
> -      if (!I2->empty())
> -        exitWithError("data mismatch", Filename2, I2.line_number());
> -      Output << "\n";
> -      continue;
> -    }
> -    switch (State) {
> -    case ReadName:
> -      if (*I1 != *I2)
> -        exitWithError("function name mismatch", Filename2, I2.line_number());
> -      Output << *I1 << "\n";
> -      State = ReadHash;
> -      break;
> -    case ReadHash:
> -      if (I1->getAsInteger(10, N1))
> -        exitWithError("bad function hash", Filename1, I1.line_number());
> -      if (I2->getAsInteger(10, N2))
> -        exitWithError("bad function hash", Filename2, I2.line_number());
> -      if (N1 != N2)
> -        exitWithError("function hash mismatch", Filename2, I2.line_number());
> -      Output << N1 << "\n";
> -      State = ReadCount;
> -      break;
> -    case ReadCount:
> -      if (I1->getAsInteger(10, N1))
> -        exitWithError("bad function count", Filename1, I1.line_number());
> -      if (I2->getAsInteger(10, N2))
> -        exitWithError("bad function count", Filename2, I2.line_number());
> -      if (N1 != N2)
> -        exitWithError("function count mismatch", Filename2, I2.line_number());
> -      Output << N1 << "\n";
> -      NumCounters = N1;
> -      State = ReadCounters;
> -      break;
> -    case ReadCounters:
> -      if (I1->getAsInteger(10, N1))
> -        exitWithError("invalid counter", Filename1, I1.line_number());
> -      if (I2->getAsInteger(10, N2))
> -        exitWithError("invalid counter", Filename2, I2.line_number());
> -      uint64_t Sum = N1 + N2;
> -      if (Sum < N1)
> -        exitWithError("counter overflow", Filename2, I2.line_number());
> -      Output << N1 + N2 << "\n";
> -      if (--NumCounters == 0)
> -        State = ReadName;
> -      break;
> +  for (InstrProfIterator I1 = Reader1->begin(), E1 = Reader1->end(),
> +                         I2 = Reader2->begin(), E2 = Reader2->end();
> +       I1 != E1 && I2 != E2; ++I1, ++I2) {
> +    if (I1->Name != I2->Name)
> +      exitWithError("Function name mismatch, " + I1->Name + " != " + I2->Name);
> +    if (I1->Hash != I2->Hash)
> +      exitWithError("Function hash mismatch for " + I1->Name);
> +    if (I1->Counts.size() != I2->Counts.size())
> +      exitWithError("Function count mismatch for " + I1->Name);
> +
> +    Output << I1->Name << "\n" << I1->Hash << "\n" << I1->Counts.size() << "\n";
> +
> +    for (size_t II = 0, EE = I1->Counts.size(); II < EE; ++II) {

Ranged-based for here?

> +      uint64_t Sum = I1->Counts[II] + I2->Counts[II];
> +      if (Sum < I1->Counts[II])
> +        exitWithError("Counter overflow for " + I1->Name);
> +      Output << Sum << "\n";
>      }
> +    Output << "\n";
>    }
> -  if (!I1.is_at_end())
> -    exitWithError("truncated file", Filename1, I1.line_number());
> -  if (!I2.is_at_end())
> -    exitWithError("truncated file", Filename2, I2.line_number());
> -  if (State != ReadName)
> -    exitWithError("truncated file", Filename1, I1.line_number());
> +  if (Reader1->hasError())
> +    exitWithError(Reader1->getError().message(), Filename1);
> +  if (Reader2->hasError())
> +    exitWithError(Reader2->getError().message(), Filename1);

Should this be Filename2?

> +  if (!Reader1->isEOF() || !Reader2->isEOF())
> +    exitWithError("Number of instrumented functions differ.”);

If we’re assuming for now that the first file is canonical, I think
you should give Filename2 as the source of the error.

>  
>    return 0;
>  }
>