[llvm] 9bd69ae - [nfc][mlgo] Remove abstraction layers for training logger
Mircea Trofin via llvm-commits
llvm-commits at lists.llvm.org
Tue Jan 17 16:27:49 PST 2023
Author: Mircea Trofin
Date: 2023-01-17T16:19:38-08:00
New Revision: 9bd69ae8f77e6339733ab8ef7783b5c73cffdb98
URL: https://github.com/llvm/llvm-project/commit/9bd69ae8f77e6339733ab8ef7783b5c73cffdb98
DIFF: https://github.com/llvm/llvm-project/commit/9bd69ae8f77e6339733ab8ef7783b5c73cffdb98.diff
LOG: [nfc][mlgo] Remove abstraction layers for training logger
This follows from D141720
Differential Revision: https://reviews.llvm.org/D141967
Added:
Modified:
llvm/include/llvm/Analysis/Utils/TrainingLogger.h
llvm/lib/Analysis/TrainingLogger.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/Analysis/Utils/TrainingLogger.h b/llvm/include/llvm/Analysis/Utils/TrainingLogger.h
index 34f8a3f9fd6f9..7ee25bb44df51 100644
--- a/llvm/include/llvm/Analysis/Utils/TrainingLogger.h
+++ b/llvm/include/llvm/Analysis/Utils/TrainingLogger.h
@@ -6,6 +6,50 @@
//
//===----------------------------------------------------------------------===//
//
+// The design goals of the logger are:
+// - no dependencies that llvm doesn't already have.
+// - support streaming, so that we don't need to buffer data during compilation
+// - 0-decoding tensor values. Tensor values are potentially very large buffers
+// of scalars. Because of their potentially large size, avoiding
+// serialization/deserialization overhead is preferred.
+//
+// The simple logger produces an output of the form (each line item on its line)
+// - header: a json object describing the data that will follow.
+// - context: e.g. function name, for regalloc, or "default" for module-wide
+// optimizations like the inliner. This is the context to which the subsequent
+// data corresponds.
+// - observation number.
+// - tensor values - raw bytes of the tensors, in the order given in the header.
+// The values are in succession, i.e. no separator is found between successive
+// tensor values. At the end, there is a new line character.
+// - [score] - this is optional, and is present if it was present in the header.
+// Currently, for final rewards, we output "0" scores after each observation,
+// except for the last one.
+// <repeat>
+// The file should be read as binary, but the reason we use newlines is mostly
+// ease of debugging: the log can be opened in a text editor and, while tensor
+// values are inscrutable, at least the sequence of data can be easily observed.
+// Of course, the buffer of tensor values could contain '\n' bytes. A reader
+// should use the header information to know how much data to read for the
+// tensor values, and not use line information for that.
+//
+// An example reader, used for test, is available at
+// Analysis/models/log_reader.py
+//
+// Example:
+// {"features":[list of TensorSpecs], "score":<a tensor spec>}
+// {"context": "aFunction"}
+// {"observation": 0}
+// <bytes>
+// {"outcome": 0}
+// <bytes for the tensor corresponding to the "score" spec in the header>
+// {"observation": 1}
+// ...
+// {"context": "anotherFunction"}
+// {"observation": 0}
+// ...
+//
+
#ifndef LLVM_ANALYSIS_UTILS_TRAININGLOGGER_H
#define LLVM_ANALYSIS_UTILS_TRAININGLOGGER_H
@@ -43,8 +87,21 @@ namespace llvm {
/// At the end, call print to generate the log.
/// Alternatively, don't call logReward at the end of each event, just
/// log{Float|Int32|Int64}FinalReward at the end.
-class LoggerDataImpl;
class Logger final {
+ const std::vector<TensorSpec> FeatureSpecs;
+ const TensorSpec RewardSpec;
+ const bool IncludeReward;
+ std::vector<std::unique_ptr<char[]>> FeatureStorage;
+ std::vector<std::unique_ptr<char[]>> RewardStorage;
+ raw_ostream &dumpHeader(raw_ostream &OS) const;
+ raw_ostream &startContext(raw_ostream &OS, StringRef Name) const;
+ raw_ostream &startObservation(raw_ostream &OS, size_t Nr) const;
+ raw_ostream &writeOutcome(raw_ostream &OS, size_t CurrentObservationID) const;
+ char *addNewTensor(size_t FeatureID);
+ size_t getNrRecords() const;
+
+ void logRewardImpl(const char *Value, size_t Size);
+
public:
/// Construct a Logger. If IncludeReward is false, then logReward or
/// logFinalReward shouldn't be called, and the reward feature won't be
@@ -53,10 +110,13 @@ class Logger final {
/// corresponding indices) with any MLModelRunner implementations
/// corresponding to the model being trained/logged.
Logger(const std::vector<TensorSpec> &FeatureSpecs,
- const TensorSpec &RewardSpec, bool IncludeReward);
-
- ~Logger();
+ const TensorSpec &RewardSpec, bool IncludeReward)
+ : FeatureSpecs(FeatureSpecs), RewardSpec(RewardSpec),
+ IncludeReward(IncludeReward) {}
+ template <typename T> void logReward(T Value) {
+ logRewardImpl(reinterpret_cast<const char *>(&Value), sizeof(T));
+ }
void logFloatReward(float Value);
void logInt32Reward(int32_t Value);
void logInt64Reward(int64_t Value);
@@ -80,19 +140,13 @@ class Logger final {
// Flush the content of the log to the stream, clearing the stored data in the
// process.
- void flush(std::string *Str);
- void flush(raw_ostream &OS);
+ raw_ostream &flush(raw_ostream &OS, bool WithHeader = true,
+ StringRef Context = "default") const;
// Flush a set of logs that are produced from the same module, e.g.
// per-function regalloc traces.
static void flushLogs(raw_ostream &OS,
const StringMap<std::unique_ptr<Logger>> &Loggers);
-
-private:
- std::vector<TensorSpec> FeatureSpecs;
- TensorSpec RewardSpec;
- const bool IncludeReward;
- std::unique_ptr<LoggerDataImpl> LoggerData;
};
} // namespace llvm
diff --git a/llvm/lib/Analysis/TrainingLogger.cpp b/llvm/lib/Analysis/TrainingLogger.cpp
index 0a893218d2fa8..72ec14ae7c9bd 100644
--- a/llvm/lib/Analysis/TrainingLogger.cpp
+++ b/llvm/lib/Analysis/TrainingLogger.cpp
@@ -32,188 +32,95 @@ static cl::opt<bool>
UseSimpleLogger("tfutils-use-simplelogger", cl::init(true), cl::Hidden,
cl::desc("Output simple (non-protobuf) log."));
-namespace llvm {
-
-class LoggerDataImpl {
-protected:
- const std::vector<TensorSpec> LoggedFeatureSpecs;
- const TensorSpec RewardSpec;
- const bool IncludeReward;
- LoggerDataImpl(const std::vector<TensorSpec> &LoggedSpecs,
- const TensorSpec &RewardSpec, bool IncludeReward)
- : LoggedFeatureSpecs(LoggedSpecs), RewardSpec(RewardSpec),
- IncludeReward(IncludeReward) {}
- virtual void logRewardImpl(const char *Value, size_t Size) = 0;
-
-public:
- // flush the logged info to a stream and clear the log contents.
- virtual void flush(std::string *Str) = 0;
- virtual char *addNewTensor(size_t FeatureID) = 0;
- virtual size_t getNrRecords() const = 0;
- virtual ~LoggerDataImpl() = default;
+raw_ostream &Logger::dumpHeader(raw_ostream &OS) const {
+ json::OStream JOS(OS);
+ JOS.object([&]() {
+ JOS.attributeArray("features", [&]() {
+ for (const auto &TS : FeatureSpecs)
+ TS.toJSON(JOS);
+ });
+ if (IncludeReward) {
+ JOS.attributeBegin("score");
+ RewardSpec.toJSON(JOS);
+ JOS.attributeEnd();
+ }
+ });
+ OS << "\n";
+ return OS;
+}
- template <typename T> void logReward(T Value) {
- logRewardImpl(reinterpret_cast<const char *>(&Value), sizeof(T));
- }
-};
+raw_ostream &Logger::startContext(raw_ostream &OS, StringRef Name) const {
+ json::OStream JOS(OS);
+ JOS.object([&]() { JOS.attribute("context", Name); });
+ OS << "\n";
+ return OS;
+}
-// The design goals of the simple logger are:
-// - no dependencies that llvm doesn't already have.
-// - support streaming, so that we don't need to buffer data during compilation
-// - 0-decoding tensor values. Tensor values are potentially very large buffers
-// of scalars. Because of their potentially large size, avoiding
-// serialization/deserialization overhead is preferred.
-//
-// The simple logger produces an output of the form (each line item on its line)
-// - header: a json object describing the data that will follow.
-// - context: e.g. function name, for regalloc, or "default" for module-wide
-// optimizations like the inliner. This is the context to which the subsequent
-// data corresponds.
-// - observation number.
-// - tensor values - raw bytes of the tensors, in the order given in the header.
-// The values are in succession, i.e. no separator is found between successive
-// tensor values. At the end, there is a new line character.
-// - [score] - this is optional, and is present if it was present in the header.
-// Currently, for final rewards, we output "0" scores after each observation,
-// except for the last one.
-// <repeat>
-// The file should be read as binary, but the reason we use newlines is mostly
-// ease of debugging: the log can be opened in a text editor and, while tensor
-// values are inscrutable, at least the sequence of data can be easily observed.
-// Of course, the buffer of tensor values could contain '\n' bytes. A reader
-// should use the header information to know how much data to read for the
-// tensor values, and not use line information for that.
-//
-// An example reader, used for test, is available at
-// Analysis/models/log_reader.py
-//
-// Example:
-// {"features":[list of TensorSpecs], "score":<a tensor spec>}
-// {"context": "aFunction"}
-// {"observation": 0}
-// <bytes>
-// {"outcome": 0}
-// <bytes for the tensor corresponding to the "score" spec in the header>
-// {"observation": 1}
-// ...
-// {"context": "anotherFunction"}
-// {"observation": 0}
-// ...
-//
-class SimpleLoggerDataImpl : public LoggerDataImpl {
- std::vector<std::unique_ptr<char[]>> FeatureStorage;
- std::vector<std::unique_ptr<char[]>> RewardStorage;
+raw_ostream &Logger::startObservation(raw_ostream &OS, size_t Nr) const {
+ json::OStream JOS(OS);
+ JOS.object([&]() { JOS.attribute("observation", static_cast<int64_t>(Nr)); });
+ OS << "\n";
+ return OS;
+}
- raw_ostream &dumpHeader(raw_ostream &OS) const {
+raw_ostream &Logger::writeOutcome(raw_ostream &OS,
+ size_t CurrentObservationID) const {
+ if (IncludeReward) {
+ OS << "\n";
json::OStream JOS(OS);
JOS.object([&]() {
- JOS.attributeArray("features", [&]() {
- for (const auto &TS : LoggedFeatureSpecs)
- TS.toJSON(JOS);
- });
- if (IncludeReward) {
- JOS.attributeBegin("score");
- RewardSpec.toJSON(JOS);
- JOS.attributeEnd();
- }
+ JOS.attribute("outcome", static_cast<int64_t>(CurrentObservationID));
});
OS << "\n";
- return OS;
- }
-
- raw_ostream &startContext(raw_ostream &OS, StringRef Name) const {
- json::OStream JOS(OS);
- JOS.object([&]() { JOS.attribute("context", Name); });
- OS << "\n";
- return OS;
- }
-
- raw_ostream &startObservation(raw_ostream &OS, size_t Nr) const {
- json::OStream JOS(OS);
- JOS.object(
- [&]() { JOS.attribute("observation", static_cast<int64_t>(Nr)); });
- OS << "\n";
- return OS;
- }
-
- raw_ostream &writeOutcome(raw_ostream &OS,
- size_t CurrentObservationID) const {
- if (IncludeReward) {
- OS << "\n";
- json::OStream JOS(OS);
- JOS.object([&]() {
- JOS.attribute("outcome", static_cast<int64_t>(CurrentObservationID));
- });
- OS << "\n";
- OS.write(RewardStorage[CurrentObservationID].get(),
- RewardSpec.getTotalTensorBufferSize());
- }
- OS << "\n";
- return OS;
- }
- void flush(std::string *Str) override {
- llvm_unreachable("Use the ostream implementation");
- }
-
- char *addNewTensor(size_t FeatureID) override {
- return FeatureStorage
- .emplace_back(
- new char[LoggedFeatureSpecs[FeatureID].getTotalTensorBufferSize()])
- .get();
+ OS.write(RewardStorage[CurrentObservationID].get(),
+ RewardSpec.getTotalTensorBufferSize());
}
+ OS << "\n";
+ return OS;
+}
- size_t getNrRecords() const override {
- assert(FeatureStorage.size() % LoggedFeatureSpecs.size() == 0);
- return FeatureStorage.size() / LoggedFeatureSpecs.size();
- }
+char *Logger::addNewTensor(size_t FeatureID) {
+ return FeatureStorage
+ .emplace_back(
+ new char[FeatureSpecs[FeatureID].getTotalTensorBufferSize()])
+ .get();
+}
- void logRewardImpl(const char *Value, size_t Size) override {
- std::memcpy(RewardStorage.emplace_back(new char[Size]).get(), Value, Size);
- }
+size_t Logger::getNrRecords() const {
+ assert(FeatureStorage.size() % FeatureSpecs.size() == 0);
+ return FeatureStorage.size() / FeatureSpecs.size();
+}
-public:
- SimpleLoggerDataImpl(const std::vector<TensorSpec> &LoggedSpecs,
- const TensorSpec &RewardSpec, bool IncludeReward)
- : LoggerDataImpl(LoggedSpecs, RewardSpec, IncludeReward) {}
+void Logger::logRewardImpl(const char *Value, size_t Size) {
+ std::memcpy(RewardStorage.emplace_back(new char[Size]).get(), Value, Size);
+}
- raw_ostream &flush(raw_ostream &OS, bool WithHeader = true,
- StringRef Context = "default") const {
- if (WithHeader)
- dumpHeader(OS);
- startContext(OS, Context);
- size_t CurrentObservationID = 0;
- for (size_t I = 0; I < FeatureStorage.size(); ++I) {
- size_t TensorID = I % LoggedFeatureSpecs.size();
- if (TensorID == 0) {
- CurrentObservationID = I / LoggedFeatureSpecs.size();
- startObservation(OS, CurrentObservationID);
- }
- OS.write(FeatureStorage[I].get(),
- LoggedFeatureSpecs[TensorID].getTotalTensorBufferSize());
- if (TensorID == LoggedFeatureSpecs.size() - 1) {
- writeOutcome(OS, CurrentObservationID);
- }
+raw_ostream &Logger::flush(raw_ostream &OS, bool WithHeader,
+ StringRef Context) const {
+ if (WithHeader)
+ dumpHeader(OS);
+ startContext(OS, Context);
+ size_t CurrentObservationID = 0;
+ for (size_t I = 0; I < FeatureStorage.size(); ++I) {
+ size_t TensorID = I % FeatureSpecs.size();
+ if (TensorID == 0) {
+ CurrentObservationID = I / FeatureSpecs.size();
+ startObservation(OS, CurrentObservationID);
+ }
+ OS.write(FeatureStorage[I].get(),
+ FeatureSpecs[TensorID].getTotalTensorBufferSize());
+ if (TensorID == FeatureSpecs.size() - 1) {
+ writeOutcome(OS, CurrentObservationID);
}
- return OS;
}
-};
-} // namespace llvm
-
-Logger::Logger(const std::vector<TensorSpec> &FeatureSpecs,
- const TensorSpec &RewardSpec, bool IncludeReward)
- : FeatureSpecs(FeatureSpecs), RewardSpec(RewardSpec),
- IncludeReward(IncludeReward) {
- LoggerData = std::make_unique<SimpleLoggerDataImpl>(FeatureSpecs, RewardSpec,
- IncludeReward);
+ return OS;
}
-Logger::~Logger() {}
-
#define LOG_REWARD(NAME, TYPE) \
void Logger::log##NAME##Reward(TYPE Value) { \
assert(IncludeReward); \
(void)IncludeReward; \
- LoggerData->logReward(Value); \
+ logReward(Value); \
}
LOG_REWARD(Float, float)
@@ -224,7 +131,7 @@ LOG_REWARD(Int64, int64_t)
#define LOG_FINAL_REWARD(NAME, TYPE) \
void Logger::log##NAME##FinalReward(TYPE Value) { \
assert(RewardSpec.isElementType<TYPE>()); \
- for (size_t I = 1; I < LoggerData->getNrRecords(); ++I) \
+ for (size_t I = 1; I < getNrRecords(); ++I) \
log##NAME##Reward(0); \
log##NAME##Reward(Value); \
}
@@ -264,28 +171,14 @@ void Logger::logSpecifiedTensorValue(size_t FeatureID, const char *RawData) {
}
char *Logger::addEntryAndGetFloatOrInt64Buffer(size_t FeatureID) {
- return reinterpret_cast<char *>(LoggerData->addNewTensor(FeatureID));
-}
-
-void Logger::flush(std::string *Str) { LoggerData->flush(Str); }
-
-void Logger::flush(raw_ostream &OS) {
- if (UseSimpleLogger) {
- reinterpret_cast<SimpleLoggerDataImpl *>(LoggerData.get())->flush(OS);
- } else {
- std::string Buff;
- LoggerData->flush(&Buff);
- OS << Buff;
- }
+ return reinterpret_cast<char *>(addNewTensor(FeatureID));
}
void Logger::flushLogs(raw_ostream &OS,
const StringMap<std::unique_ptr<Logger>> &Loggers) {
bool IsFirst = true;
for (const auto &NamedLogger : Loggers) {
- auto *Impl = NamedLogger.second->LoggerData.get();
- reinterpret_cast<const SimpleLoggerDataImpl *>(Impl)->flush(
- OS, IsFirst, NamedLogger.first());
+ NamedLogger.second->flush(OS, IsFirst, NamedLogger.first());
IsFirst = false;
}
}
More information about the llvm-commits
mailing list