[PATCH] Introduce the InstrProfReader interface and a text reader
Duncan P. N. Exon Smith
dexonsmith at apple.com
Wed Mar 19 16:06:35 PDT 2014
On Mar 19, 2014, at 3:52 PM, Justin Bogner <mail at justinbogner.com> wrote:
> This patch introduces the ProfileData library without going into details
> about the data format that instrumentation based profiling should use,
> instead just sticking to the naive text format we have now.
>
> This is designed to make readers for multiple formats simple, so that a
> "raw" format for compiler-rt to commit and an efficient format for clang
> to consume is easy.
>
> Eric, would you mind checking the build system changes? It builds with
> both clang and autoconf for me, but I'd appreciate a look over by
> someone who has a clue how our build works.
>
> <TextInstrProfReader.patch>
LGTM, with a couple of fixups.
But I don’t understand our build system either, so you should wait for
someone to check that over.
> diff --git a/include/llvm/ProfileData/InstrProfReader.h b/include/llvm/ProfileData/InstrProfReader.h
> new file mode 100644
> index 0000000..fb09c75
> --- /dev/null
> +++ b/include/llvm/ProfileData/InstrProfReader.h
> @@ -0,0 +1,118 @@
> +//=-- InstrProfReader.h - Instrumented profiling readers ----------*- C++ -*-=//
> +//
> +// The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
> +//===----------------------------------------------------------------------===//
> +//
> +// This file contains support for reading profiling data for instrumentation
> +// based PGO and coverage.
> +//
> +//===----------------------------------------------------------------------===//
> +
> +#ifndef LLVM_PROFILEDATA_INSTRPROF_READER_H__
> +#define LLVM_PROFILEDATA_INSTRPROF_READER_H__
> +
> +#include "llvm/ADT/ArrayRef.h"
> +#include "llvm/ProfileData/InstrProf.h"
> +#include "llvm/Support/LineIterator.h"
> +#include "llvm/Support/MemoryBuffer.h"
> +
> +#include <iterator>
> +
> +namespace llvm {
> +
> +class InstrProfReader;
> +
> +/// Profiling information for a single function.
> +struct InstrProfRecord {
> + StringRef Name;
> + uint64_t Hash;
> + ArrayRef<uint64_t> Counts;
> +};
> +
> +/// A file format agnostic iterator over profiling data.
> +class InstrProfIterator : public std::iterator<std::input_iterator_tag,
> + InstrProfRecord> {
> + InstrProfReader *Reader;
> + InstrProfRecord Record;
> +
> + void Increment();
> +public:
> + InstrProfIterator() : Reader(nullptr) {}
> + InstrProfIterator(InstrProfReader *Reader) : Reader(Reader) { Increment(); }
> +
> + InstrProfIterator &operator++() { Increment(); return *this; }
> + bool operator==(const InstrProfIterator &RHS) { return Reader == RHS.Reader; }
> + bool operator!=(const InstrProfIterator &RHS) { return Reader != RHS.Reader; }
> + InstrProfRecord &operator*() { return Record; }
> + InstrProfRecord *operator->() { return &Record; }
> +};
> +
> +/// Base class and interface for reading profiling data of any known instrprof
> +/// format. Provides an iterator over InstrProfRecords.
> +class InstrProfReader {
> + error_code LastError;
> +public:
> + InstrProfReader() : LastError(instrprof_error::success) {}
> + virtual ~InstrProfReader() {}
> +
> + /// Read a single record.
> + virtual error_code readNextRecord(InstrProfRecord &Record) = 0;
> + /// Iterator over profile data.
> + virtual InstrProfIterator begin() { return InstrProfIterator(this); }
> + virtual InstrProfIterator end() { return InstrProfIterator(); }
Do these need to be virtual?
> +
> + /// Set the current error_code and return same.
> + error_code error(error_code EC) {
> + LastError = EC;
> + return EC;
> + }
> + /// Clear the current error code and return a successful one.
> + error_code success() { return error(instrprof_error::success); }
> +
> + /// Return true if the reader has finished reading the profile data.
> + bool isEOF() { return LastError == instrprof_error::eof; }
> + /// Return true if the reader encountered an error reading profiling data.
> + bool hasError() { return LastError && !isEOF(); }
> + /// Get the current error code.
> + error_code getError() { return LastError; }
> +
> + /// Factory method to create an appropriately typed reader for the given
> + /// instrprof file.
> + static error_code create(std::string Path,
> + std::unique_ptr<InstrProfReader> &Result);
> +};
> +
> +/// Reader for the simple text based instrprof format.
> +///
> +/// This format is a simple text format that's suitable for test data. Records
> +/// are separated by one or more blank lines, and record fields are separated by
> +/// new lines.
> +///
> +/// Each record consists of a function name, a function hash, a number of
> +/// counters, and then each counter value, in that order.
> +class TextInstrProfReader : public InstrProfReader {
> +private:
> + /// The profile data file contents.
> + std::unique_ptr<MemoryBuffer> DataBuffer;
> + /// Iterator over the profile data.
> + line_iterator Line;
> + /// The current set of counter values.
> + std::vector<uint64_t> Counts;
> +
> + TextInstrProfReader(const TextInstrProfReader &) LLVM_DELETED_FUNCTION;
> + TextInstrProfReader &operator=(const TextInstrProfReader &)
> + LLVM_DELETED_FUNCTION;
> +public:
> + TextInstrProfReader(std::unique_ptr<MemoryBuffer> &DataBuffer_)
> + : DataBuffer(DataBuffer_.release()), Line(*DataBuffer, '#') {}
> +
> + error_code readNextRecord(InstrProfRecord &Record) override;
> +};
> +
> +} // end namespace llvm
> +
> diff --git a/lib/ProfileData/InstrProf.cpp b/lib/ProfileData/InstrProf.cpp
> new file mode 100644
> index 0000000..329abf8
> --- /dev/null
> +++ b/lib/ProfileData/InstrProf.cpp
> @@ -0,0 +1,56 @@
> +//=-- InstrProf.cpp - Instrumented profiling format support -----------------=//
> +//
> +// The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
> +//===----------------------------------------------------------------------===//
> +//
> +// This file contains support for clang's instrumentation based PGO and
> +// coverage.
> +//
> +//===----------------------------------------------------------------------===//
> +
> +#include "llvm/ProfileData/InstrProf.h"
> +#include "llvm/Support/ErrorHandling.h"
> +
> +using namespace llvm;
> +
> +namespace {
> +class InstrProfErrorCategoryType : public error_category {
> + const char *name() const override { return "llvm.instrprof"; }
> + std::string message(int IE) const override {
> + instrprof_error::ErrorType E = static_cast<instrprof_error::ErrorType>(IE);
> + switch (E) {
> + case instrprof_error::success:
> + return "Success";
> + case instrprof_error::eof:
> + return "End of File";
> + case instrprof_error::bad_magic:
> + return "Invalid file format (bad magic)";
> + case instrprof_error::unsupported_version:
> + return "Unsupported format version";
> + case instrprof_error::too_large:
> + return "Too much profile data";
> + case instrprof_error::truncated:
> + return "Truncated profile data";
> + case instrprof_error::malformed:
> + return "Malformed profile data";
> + case instrprof_error::unknown_function:
> + return "No profile data available for function";
> + }
> + llvm_unreachable("A value of instrprof_error has no message.");
> + }
> + error_condition default_error_condition(int EV) const {
> + if (EV == instrprof_error::success)
> + return errc::success;
> + return errc::invalid_argument;
> + }
> +};
> +}
> +
> +const error_category &llvm::instrprof_category() {
> + static InstrProfErrorCategoryType C;
> + return C;
> +}
> diff --git a/lib/ProfileData/InstrProfReader.cpp b/lib/ProfileData/InstrProfReader.cpp
> new file mode 100644
> index 0000000..8b36000
> --- /dev/null
> +++ b/lib/ProfileData/InstrProfReader.cpp
> @@ -0,0 +1,84 @@
> +//=-- InstrProfReader.cpp - Instrumented profiling reader -------------------=//
> +//
> +// The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
> +//===----------------------------------------------------------------------===//
> +//
> +// This file contains support for reading profiling data for clang's
> +// instrumentation based PGO and coverage.
> +//
> +//===----------------------------------------------------------------------===//
> +
> +#include "llvm/ProfileData/InstrProfReader.h"
> +#include "llvm/ProfileData/InstrProf.h"
> +#include "llvm/Support/Endian.h"
> +
> +#include <cassert>
> +
> +using namespace llvm;
> +
> +error_code InstrProfReader::create(std::string Path,
> + std::unique_ptr<InstrProfReader> &Result) {
> + std::unique_ptr<MemoryBuffer> Buffer;
> + if (error_code EC = MemoryBuffer::getFileOrSTDIN(Path, Buffer))
> + return EC;
> +
> + // Sanity check the file.
> + if (Buffer->getBufferSize() > std::numeric_limits<unsigned>::max())
> + return instrprof_error::too_large;
> +
> + // FIXME: This needs to determine which format the file is and construct the
> + // correct subclass.
> + Result.reset(new TextInstrProfReader(Buffer));
> +
> + return instrprof_error::success;
> +}
> +
> +void InstrProfIterator::Increment() {
> + if (Reader->readNextRecord(Record))
> + *this = InstrProfIterator();
> +}
> +
> +error_code TextInstrProfReader::readNextRecord(InstrProfRecord &Record) {
> + // Skip empty lines.
> + while (!Line.is_at_end() && Line->empty())
> + ++Line;
> + // If we hit EOF while looking for a name, we're done.
> + if (Line.is_at_end())
> + return error(instrprof_error::eof);
> +
> + // Read the function name.
> + Record.Name = *Line++;
> +
> + // Read the function hash.
> + if (Line.is_at_end())
> + return error(instrprof_error::truncated);
> + if ((Line++)->getAsInteger(10, Record.Hash))
> + return error(instrprof_error::malformed);
> +
> + // Read the number of counters.
> + uint64_t NumCounters;
> + if (Line.is_at_end())
> + return error(instrprof_error::truncated);
> + if ((Line++)->getAsInteger(10, NumCounters))
> + return error(instrprof_error::malformed);
> +
> + // Read each counter and fill our internal storage with the values.
> + Counts.clear();
> + Counts.reserve(NumCounters);
> + for (uint64_t I = 0; I < NumCounters; ++I) {
> + if (Line.is_at_end())
> + return error(instrprof_error::truncated);
> + uint64_t Count;
> + if ((Line++)->getAsInteger(10, Count))
> + return error(instrprof_error::malformed);
> + Counts.push_back(Count);
> + }
> + // Give the record a reference to our internal counter storage.
> + Record.Counts = Counts;
> +
> + return success();
> +}
> diff --git a/test/tools/llvm-profdata/errors.test b/test/tools/llvm-profdata/errors.test
> index 219d88d..afac7a4 100644
> --- a/test/tools/llvm-profdata/errors.test
> +++ b/test/tools/llvm-profdata/errors.test
> @@ -1,19 +1,19 @@
> RUN: not llvm-profdata merge %p/Inputs/empty.profdata %p/Inputs/foo3-1.profdata 2>&1 | FileCheck %s --check-prefix=LENGTH
> RUN: not llvm-profdata merge %p/Inputs/foo3-1.profdata %p/Inputs/foo3bar3-1.profdata 2>&1 | FileCheck %s --check-prefix=LENGTH
> RUN: not llvm-profdata merge %p/Inputs/foo4-1.profdata %p/Inputs/empty.profdata 2>&1 | FileCheck %s --check-prefix=LENGTH
> -LENGTH: error: {{.*}}: truncated file
> +LENGTH: error: Number of instrumented functions differ
>
> RUN: not llvm-profdata merge %p/Inputs/foo3-1.profdata %p/Inputs/bar3-1.profdata 2>&1 | FileCheck %s --check-prefix=NAME
> -NAME: error: {{.*}}: function name mismatch
> +NAME: error: Function name mismatch, foo != bar
>
> RUN: not llvm-profdata merge %p/Inputs/foo3-1.profdata %p/Inputs/foo4-1.profdata 2>&1 | FileCheck %s --check-prefix=HASH
> -HASH: error: {{.*}}: function hash mismatch
> +HASH: error: Function hash mismatch for foo
>
> RUN: not llvm-profdata merge %p/Inputs/overflow.profdata %p/Inputs/overflow.profdata 2>&1 | FileCheck %s --check-prefix=OVERFLOW
> -OVERFLOW: error: {{.*}}: counter overflow
> +OVERFLOW: error: Counter overflow for overflow
>
> RUN: not llvm-profdata merge %p/Inputs/invalid-count-later.profdata %p/Inputs/invalid-count-later.profdata 2>&1 | FileCheck %s --check-prefix=INVALID-COUNT-LATER
> -INVALID-COUNT-LATER: error: {{.*}}: invalid counter
> +INVALID-COUNT-LATER: error: {{.*}}: Malformed profile data
>
> RUN: not llvm-profdata merge %p/Inputs/bad-hash.profdata %p/Inputs/bad-hash.profdata 2>&1 | FileCheck %s --check-prefix=BAD-HASH
> -BAD-HASH: error: {{.*}}: bad function hash
> +BAD-HASH: error: {{.*}}: Malformed profile data
> diff --git a/tools/llvm-profdata/CMakeLists.txt b/tools/llvm-profdata/CMakeLists.txt
> index 4b1357d..4c64cfa 100644
> --- a/tools/llvm-profdata/CMakeLists.txt
> +++ b/tools/llvm-profdata/CMakeLists.txt
> @@ -1,4 +1,4 @@
> -set(LLVM_LINK_COMPONENTS core support )
> +set(LLVM_LINK_COMPONENTS core profiledata support )
>
> add_llvm_tool(llvm-profdata
> llvm-profdata.cpp
> diff --git a/tools/llvm-profdata/LLVMBuild.txt b/tools/llvm-profdata/LLVMBuild.txt
> index fc9e469..bbc1ea6 100644
> --- a/tools/llvm-profdata/LLVMBuild.txt
> +++ b/tools/llvm-profdata/LLVMBuild.txt
> @@ -19,4 +19,4 @@
> type = Tool
> name = llvm-profdata
> parent = Tools
> -required_libraries = Support
> +required_libraries = ProfileData Support
> diff --git a/tools/llvm-profdata/Makefile b/tools/llvm-profdata/Makefile
> index 9d7ad52..4177780 100644
> --- a/tools/llvm-profdata/Makefile
> +++ b/tools/llvm-profdata/Makefile
> @@ -9,7 +9,7 @@
>
> LEVEL := ../..
> TOOLNAME := llvm-profdata
> -LINK_COMPONENTS := core support
> +LINK_COMPONENTS := core profiledata support
>
> # This tool has no plugins, optimize startup time.
> TOOL_NO_EXPORTS := 1
> diff --git a/tools/llvm-profdata/llvm-profdata.cpp b/tools/llvm-profdata/llvm-profdata.cpp
> index d8abafc..832f3ae 100644
> --- a/tools/llvm-profdata/llvm-profdata.cpp
> +++ b/tools/llvm-profdata/llvm-profdata.cpp
> @@ -12,8 +12,8 @@
> //===----------------------------------------------------------------------===//
>
> #include "llvm/ADT/StringRef.h"
> +#include "llvm/ProfileData/InstrProfReader.h"
> #include "llvm/Support/CommandLine.h"
> -#include "llvm/Support/LineIterator.h"
> #include "llvm/Support/ManagedStatic.h"
> #include "llvm/Support/MemoryBuffer.h"
> #include "llvm/Support/PrettyStackTrace.h"
> @@ -22,12 +22,11 @@
>
> using namespace llvm;
>
> -static void exitWithError(const std::string &Message,
> - const std::string &Filename, int64_t Line = -1) {
> - errs() << "error: " << Filename;
> - if (Line >= 0)
> - errs() << ":" << Line;
> - errs() << ": " << Message << "\n";
> +static void exitWithError(const Twine &Message, StringRef Whence = "") {
> + errs() << "error: ";
> + if (!Whence.empty())
> + errs() << Whence << ": ";
> + errs() << Message << "\n";
> ::exit(1);
> }
>
> @@ -46,11 +45,10 @@ int merge_main(int argc, const char *argv[]) {
>
> cl::ParseCommandLineOptions(argc, argv, "LLVM profile data merger\n");
>
> - std::unique_ptr<MemoryBuffer> File1;
> - std::unique_ptr<MemoryBuffer> File2;
> - if (error_code ec = MemoryBuffer::getFile(Filename1, File1))
> + std::unique_ptr<InstrProfReader> Reader1, Reader2;
> + if (error_code ec = InstrProfReader::create(Filename1, Reader1))
> exitWithError(ec.message(), Filename1);
> - if (error_code ec = MemoryBuffer::getFile(Filename2, File2))
> + if (error_code ec = InstrProfReader::create(Filename2, Reader2))
> exitWithError(ec.message(), Filename2);
>
> if (OutputFilename.empty())
> @@ -61,64 +59,32 @@ int merge_main(int argc, const char *argv[]) {
> if (!ErrorInfo.empty())
> exitWithError(ErrorInfo, OutputFilename);
>
> - enum {ReadName, ReadHash, ReadCount, ReadCounters} State = ReadName;
> - uint64_t N1, N2, NumCounters;
> - line_iterator I1(*File1, '#'), I2(*File2, '#');
> - for (; !I1.is_at_end() && !I2.is_at_end(); ++I1, ++I2) {
> - if (I1->empty()) {
> - if (!I2->empty())
> - exitWithError("data mismatch", Filename2, I2.line_number());
> - Output << "\n";
> - continue;
> - }
> - switch (State) {
> - case ReadName:
> - if (*I1 != *I2)
> - exitWithError("function name mismatch", Filename2, I2.line_number());
> - Output << *I1 << "\n";
> - State = ReadHash;
> - break;
> - case ReadHash:
> - if (I1->getAsInteger(10, N1))
> - exitWithError("bad function hash", Filename1, I1.line_number());
> - if (I2->getAsInteger(10, N2))
> - exitWithError("bad function hash", Filename2, I2.line_number());
> - if (N1 != N2)
> - exitWithError("function hash mismatch", Filename2, I2.line_number());
> - Output << N1 << "\n";
> - State = ReadCount;
> - break;
> - case ReadCount:
> - if (I1->getAsInteger(10, N1))
> - exitWithError("bad function count", Filename1, I1.line_number());
> - if (I2->getAsInteger(10, N2))
> - exitWithError("bad function count", Filename2, I2.line_number());
> - if (N1 != N2)
> - exitWithError("function count mismatch", Filename2, I2.line_number());
> - Output << N1 << "\n";
> - NumCounters = N1;
> - State = ReadCounters;
> - break;
> - case ReadCounters:
> - if (I1->getAsInteger(10, N1))
> - exitWithError("invalid counter", Filename1, I1.line_number());
> - if (I2->getAsInteger(10, N2))
> - exitWithError("invalid counter", Filename2, I2.line_number());
> - uint64_t Sum = N1 + N2;
> - if (Sum < N1)
> - exitWithError("counter overflow", Filename2, I2.line_number());
> - Output << N1 + N2 << "\n";
> - if (--NumCounters == 0)
> - State = ReadName;
> - break;
> + for (InstrProfIterator I1 = Reader1->begin(), E1 = Reader1->end(),
> + I2 = Reader2->begin(), E2 = Reader2->end();
> + I1 != E1 && I2 != E2; ++I1, ++I2) {
> + if (I1->Name != I2->Name)
> + exitWithError("Function name mismatch, " + I1->Name + " != " + I2->Name);
> + if (I1->Hash != I2->Hash)
> + exitWithError("Function hash mismatch for " + I1->Name);
> + if (I1->Counts.size() != I2->Counts.size())
> + exitWithError("Function count mismatch for " + I1->Name);
> +
> + Output << I1->Name << "\n" << I1->Hash << "\n" << I1->Counts.size() << "\n";
> +
> + for (size_t II = 0, EE = I1->Counts.size(); II < EE; ++II) {
Ranged-based for here?
> + uint64_t Sum = I1->Counts[II] + I2->Counts[II];
> + if (Sum < I1->Counts[II])
> + exitWithError("Counter overflow for " + I1->Name);
> + Output << Sum << "\n";
> }
> + Output << "\n";
> }
> - if (!I1.is_at_end())
> - exitWithError("truncated file", Filename1, I1.line_number());
> - if (!I2.is_at_end())
> - exitWithError("truncated file", Filename2, I2.line_number());
> - if (State != ReadName)
> - exitWithError("truncated file", Filename1, I1.line_number());
> + if (Reader1->hasError())
> + exitWithError(Reader1->getError().message(), Filename1);
> + if (Reader2->hasError())
> + exitWithError(Reader2->getError().message(), Filename1);
Should this be Filename2?
> + if (!Reader1->isEOF() || !Reader2->isEOF())
> + exitWithError("Number of instrumented functions differ.”);
If we’re assuming for now that the first file is canonical, I think
you should give Filename2 as the source of the error.
>
> return 0;
> }
>
More information about the llvm-commits
mailing list