[llvm] 55e12f7 - [NFC][MLGO] Just use the underlying protobuf object for logging
Mircea Trofin via llvm-commits
llvm-commits at lists.llvm.org
Fri Jul 23 10:57:00 PDT 2021
Author: Mircea Trofin
Date: 2021-07-23T10:56:48-07:00
New Revision: 55e12f7080df4f2adb43e6aa8151d59e981c724a
URL: https://github.com/llvm/llvm-project/commit/55e12f7080df4f2adb43e6aa8151d59e981c724a
DIFF: https://github.com/llvm/llvm-project/commit/55e12f7080df4f2adb43e6aa8151d59e981c724a.diff
LOG: [NFC][MLGO] Just use the underlying protobuf object for logging
Avoid buffering just to copy the buffered data, in 'development
mode', when logging. Instead, just populate the underlying protobuf.
Differential Revision: https://reviews.llvm.org/D106592
Added:
Modified:
llvm/include/llvm/Analysis/Utils/TFUtils.h
llvm/lib/Analysis/DevelopmentModeInlineAdvisor.cpp
llvm/lib/Analysis/TFUtils.cpp
llvm/unittests/Analysis/TFUtilsTest.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/Analysis/Utils/TFUtils.h b/llvm/include/llvm/Analysis/Utils/TFUtils.h
index ea6bc2cf19ee6..47ee23e060002 100644
--- a/llvm/include/llvm/Analysis/Utils/TFUtils.h
+++ b/llvm/include/llvm/Analysis/Utils/TFUtils.h
@@ -12,6 +12,7 @@
#include "llvm/Config/llvm-config.h"
#ifdef LLVM_HAVE_TF_API
+#include "llvm/ADT/StringMap.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/Support/JSON.h"
@@ -120,56 +121,62 @@ loadOutputSpecs(LLVMContext &Ctx, StringRef ExpectedDecisionName,
/// The assumption is that, for an event to be logged (i.e. a set of feature
/// values and a reward), the user calls the log* API for each feature exactly
/// once, providing the index matching the position in the feature spec list
-/// provided at construction:
+/// provided at construction. The example assumes the first feature's element
+/// type is float, the second is int64, and the reward is float:
+///
/// event 0:
-/// logTensorValue(0, ...)
-/// logTensorValue(1, ...)
+/// logFloatValue(0, ...)
+/// logInt64Value(1, ...)
/// ...
-/// logReward(...)
+/// logFloatReward(...)
/// event 1:
-/// logTensorValue(0, ...)
-/// logTensorValue(1, ...)
+/// logFloatValue(0, ...)
+/// logInt64Value(1, ...)
/// ...
-/// logReward(...)
+/// logFloatReward(...)
///
/// At the end, call print to generate the protobuf.
+/// Alternatively, don't call logReward at the end of each event, just
+/// log{Float|Int32|Int64}FinalReward at the end.
+class LoggerDataImpl;
class Logger final {
public:
- /// Construct a Logger. If IncludeReward is false, then logReward shouldn't
- /// be called, and the reward feature won't be printed out.
+ /// Construct a Logger. If IncludeReward is false, then logReward or
+ /// logFinalReward shouldn't be called, and the reward feature won't be
+ /// printed out.
Logger(const std::vector<LoggedFeatureSpec> &FeatureSpecs,
- const TensorSpec &RewardSpec, bool IncludeReward)
- : FeatureSpecs(FeatureSpecs), RewardSpec(RewardSpec),
- RawLogData(FeatureSpecs.size() + IncludeReward),
- IncludeReward(IncludeReward) {}
-
- template <typename T> void logReward(T Value) {
- assert(IncludeReward);
- logTensorValue(RawLogData.size() - 1, &Value);
- }
+ const TensorSpec &RewardSpec, bool IncludeReward);
- template <typename T> void logFinalReward(T Value) {
- assert(RawLogData.back().empty());
- logReward(Value);
- }
+ ~Logger();
- template <typename T>
- void logTensorValue(size_t FeatureID, const T *Value, size_t Size = 1) {
- const char *Start = reinterpret_cast<const char *>(Value);
- const char *End = Start + sizeof(T) * Size;
- RawLogData[FeatureID].insert(RawLogData[FeatureID].end(), Start, End);
- }
+ void logFloatReward(float Value);
+ void logInt32Reward(int32_t Value);
+ void logInt64Reward(int64_t Value);
+
+ void logFloatFinalReward(float Value);
+ void logInt32FinalReward(int32_t Value);
+ void logInt64FinalReward(int64_t Value);
+
+ void logFloatValue(size_t FeatureID, const float *Value);
+ void logInt32Value(size_t FeatureID, const int32_t *Value);
+ void logInt64Value(size_t FeatureID, const int64_t *Value);
+
+ void logSpecifiedTensorValue(size_t FeatureID, const char *RawData);
+
+ // Warning! For int32_t, the return is set up for int64_t, so the caller needs
+ // to piecemeal cast their int32_t values.
+ // FIXME: let's drop int32_t support. While it's supported by evaluator, it's
+ // not supported by the tensorflow::SequenceExample proto. For small values,
+ // we can consider using bytes.
+ char *addEntryAndGetFloatOrInt64Buffer(size_t FeatureID);
void print(raw_ostream &OS);
private:
std::vector<LoggedFeatureSpec> FeatureSpecs;
TensorSpec RewardSpec;
- /// RawData has one entry per feature, plus one more for the reward.
- /// Each feature's values are then stored in a vector, in succession.
- /// This means the ith event is stored at [*][i]
- std::vector<std::vector<char>> RawLogData;
const bool IncludeReward;
+ std::unique_ptr<LoggerDataImpl> LoggerData;
};
class TFModelEvaluator final {
diff --git a/llvm/lib/Analysis/DevelopmentModeInlineAdvisor.cpp b/llvm/lib/Analysis/DevelopmentModeInlineAdvisor.cpp
index 940eb01f760f3..ecfefa36918ce 100644
--- a/llvm/lib/Analysis/DevelopmentModeInlineAdvisor.cpp
+++ b/llvm/lib/Analysis/DevelopmentModeInlineAdvisor.cpp
@@ -353,24 +353,22 @@ void TrainingLogger::logInlineEvent(const InlineEvent &Event,
size_t CurrentFeature = 0;
for (; CurrentFeature < NumberOfFeatures; ++CurrentFeature) {
int64_t F = ModelRunner.getFeature(CurrentFeature);
- L->logTensorValue(CurrentFeature, &F);
+ L->logInt64Value(CurrentFeature, &F);
}
for (size_t I = 1; I < OutputCount; ++I) {
const auto &Result = *MUTR->lastEvaluationResult();
- auto &Spec = MUTR->outputLoggedFeatureSpecs()[I].Spec;
const char *RawData =
reinterpret_cast<const char *>(Result.getUntypedTensorValue(I));
- L->logTensorValue(CurrentFeature, RawData,
- Spec.getElementCount() * Spec.getElementByteSize());
+ L->logSpecifiedTensorValue(CurrentFeature, RawData);
++CurrentFeature;
}
assert(CurrentFeature == DefaultDecisionPos);
- L->logTensorValue(DefaultDecisionPos, &Event.DefaultDecision);
- L->logTensorValue(DecisionPos, &Event.AdvisedDecision);
+ L->logInt64Value(DefaultDecisionPos, &Event.DefaultDecision);
+ L->logInt64Value(DecisionPos, &Event.AdvisedDecision);
if (InlineSizeEstimatorAnalysis::isEvaluatorRequested())
- L->logReward(Event.Reward);
+ L->logInt64Reward(Event.Reward);
// For debugging / later use
Effects.push_back(Event.Effect);
diff --git a/llvm/lib/Analysis/TFUtils.cpp b/llvm/lib/Analysis/TFUtils.cpp
index d09668528a693..e93dc303ae636 100644
--- a/llvm/lib/Analysis/TFUtils.cpp
+++ b/llvm/lib/Analysis/TFUtils.cpp
@@ -32,6 +32,9 @@
using namespace llvm;
+using google::protobuf::Message;
+using google::protobuf::TextFormat;
+
static cl::opt<bool>
ProtobufTextMode("tfutils-text-log", cl::init(false), cl::Hidden,
cl::desc("Output textual (human-readable) protobuf."));
@@ -70,55 +73,6 @@ TFStatusPtr createTFStatus() {
TFSessionOptionsPtr createTFSessionOptions() {
return TFSessionOptionsPtr(TF_NewSessionOptions(), &TF_DeleteSessionOptions);
}
-
-/// Write a list of tensors as a sequence of TensorFlow FeatureList protobufs.
-/// The tensors are assumed to be stored contiguously, in row-major format,
-/// in the TensorData buffer. Each tensor has the shape given by Spec. The
-/// feature name in the output is either the provided LoggingName, if
-/// specified, otherwise it's the name of the tensor (as given by Spec).
-void writeRawTensorsAsFeatureLists(tensorflow::FeatureLists *FE,
- const LoggedFeatureSpec &LoggedSpec,
- const char *TensorData, size_t TensorCount,
- bool FinalReward = false) {
- const auto &Spec = LoggedSpec.Spec;
- // The 'Feature' protobuf only has 3 possible fields: float_list,
- // int64_list, or bytes_list, so we capture int32 values as int64. We don't
- // support any other types.
- tensorflow::FeatureList &FL = (*FE->mutable_feature_list())[(
- LoggedSpec.LoggingName ? *LoggedSpec.LoggingName : Spec.name())];
-
- const char *CurrentTensor = TensorData;
- const size_t TensorByteSize =
- Spec.getElementCount() * Spec.getElementByteSize();
- const size_t ElemCount = Spec.getElementCount();
- for (size_t E = 0; E < TensorCount; ++E) {
- const bool ShouldWrite = E + 1 == TensorCount || !FinalReward;
-
- if (Spec.isElementType<int64_t>()) {
- auto *MF = FL.add_feature()->mutable_int64_list()->mutable_value();
- MF->Resize(ElemCount, 0);
- if (ShouldWrite)
- memcpy(MF->mutable_data(), CurrentTensor, TensorByteSize);
- } else if (Spec.isElementType<int32_t>()) {
- auto *MF = FL.add_feature()->mutable_int64_list()->mutable_value();
- MF->Resize(ElemCount, 0);
- if (ShouldWrite) {
- const int32_t *TD = reinterpret_cast<const int32_t *>(CurrentTensor);
- for (size_t I = 0; I < ElemCount; ++I)
- (*MF)[I] = TD[I];
- }
- } else if (Spec.isElementType<float>()) {
- auto *MF = FL.add_feature()->mutable_float_list()->mutable_value();
- MF->Resize(ElemCount, 0.0);
- if (ShouldWrite)
- memcpy(MF->mutable_data(), CurrentTensor, TensorByteSize);
- } else {
- llvm_unreachable("Unsupported tensor type.");
- }
- if (ShouldWrite)
- CurrentTensor += TensorByteSize;
- }
-}
} // namespace
namespace llvm {
@@ -304,6 +258,76 @@ class TFModelEvaluatorImpl {
bool checkReportAndInvalidate(const TF_Output &Output,
const TensorSpec &OutputSpec);
};
+
+class LoggerDataImpl {
+ const std::vector<LoggedFeatureSpec> LoggedFeatureSpecs;
+ const TensorSpec RewardSpec;
+
+ tensorflow::SequenceExample SE;
+ std::vector<tensorflow::FeatureList *> FeatureLists;
+ tensorflow::FeatureList *Reward = nullptr;
+
+public:
+ LoggerDataImpl(const std::vector<LoggedFeatureSpec> &LoggedSpecs,
+ const TensorSpec &RewardSpec, bool IncludeReward)
+ : LoggedFeatureSpecs(LoggedSpecs), RewardSpec(RewardSpec) {
+ auto *FL = SE.mutable_feature_lists()->mutable_feature_list();
+ if (IncludeReward)
+ Reward = &(*FL)[RewardSpec.name()];
+ // Allocate first the map entries, then capture their address. We will not
+ // mutate the set of features after this (i.e. the pointers won't dangle).
+ for (const auto &LFS : LoggedSpecs) {
+ (*FL)[LFS.LoggingName ? *LFS.LoggingName : LFS.Spec.name()] = {};
+ }
+ for (const auto &LFS : LoggedSpecs)
+ FeatureLists.push_back(
+ &(*FL)[LFS.LoggingName ? *LFS.LoggingName : LFS.Spec.name()]);
+ }
+
+ void print(raw_ostream &OS) {
+ std::string OutStr;
+ if (ProtobufTextMode)
+ google::protobuf::TextFormat::PrintToString(SE, &OutStr);
+ else
+ OutStr = SE.SerializeAsString();
+
+ OS << OutStr;
+ }
+
+ char *addNewTensor(size_t FeatureID) {
+ const auto &Spec = LoggedFeatureSpecs[FeatureID].Spec;
+ if (Spec.isElementType<float>()) {
+ auto *RF = FeatureLists[FeatureID]
+ ->add_feature()
+ ->mutable_float_list()
+ ->mutable_value();
+ RF->Resize(Spec.getElementCount(), 0.0);
+ return reinterpret_cast<char *>(RF->mutable_data());
+ } else if (Spec.isElementType<int32_t>() || Spec.isElementType<int64_t>()) {
+ auto *RF = FeatureLists[FeatureID]
+ ->add_feature()
+ ->mutable_int64_list()
+ ->mutable_value();
+ RF->Resize(Spec.getElementCount(), 0);
+ return reinterpret_cast<char *>(RF->mutable_data());
+ }
+ llvm_unreachable("Unsupported tensor type.");
+ }
+
+ template <typename T> void logReward(T Value) {
+ if (RewardSpec.isElementType<float>())
+ Reward->add_feature()->mutable_float_list()->add_value(Value);
+ else if (RewardSpec.isElementType<int32_t>() ||
+ RewardSpec.isElementType<int64_t>())
+ Reward->add_feature()->mutable_int64_list()->add_value(Value);
+ else
+ llvm_unreachable("Unsupported tensor type.");
+ }
+
+ size_t getNrRecords() const {
+ return FeatureLists.empty() ? 0 : FeatureLists[0]->feature().size();
+ }
+};
} // namespace llvm
TFModelEvaluatorImpl::TFModelEvaluatorImpl(
@@ -448,37 +472,71 @@ TFUTILS_SUPPORTED_TYPES(TFUTILS_GETDATATYPE_IMPL)
TFModelEvaluator::EvaluationResult::~EvaluationResult() {}
TFModelEvaluator::~TFModelEvaluator() {}
-void Logger::print(raw_ostream &OS) {
- tensorflow::SequenceExample SE;
+Logger::Logger(const std::vector<LoggedFeatureSpec> &FeatureSpecs,
+ const TensorSpec &RewardSpec, bool IncludeReward)
+ : FeatureSpecs(FeatureSpecs), RewardSpec(RewardSpec),
+ IncludeReward(IncludeReward),
+ LoggerData(std::make_unique<LoggerDataImpl>(FeatureSpecs, RewardSpec,
+ IncludeReward)) {}
- if (RawLogData.empty())
- return;
- if (RawLogData[0].empty())
- return;
- size_t Tensor0Size = FeatureSpecs[0].Spec.getElementCount() *
- FeatureSpecs[0].Spec.getElementByteSize();
- size_t NumberOfRecords = RawLogData[0].size() / Tensor0Size;
- if (NumberOfRecords == 0)
- return;
- size_t RewardSize =
- RewardSpec.getElementCount() * RewardSpec.getElementByteSize();
- size_t NumberOfRewards = RawLogData.back().size() / RewardSize;
-
- tensorflow::FeatureLists *FE = SE.mutable_feature_lists();
- for (size_t I = 0; I < FeatureSpecs.size(); ++I)
- writeRawTensorsAsFeatureLists(FE, FeatureSpecs[I], RawLogData[I].data(),
- NumberOfRecords);
-
- if (IncludeReward)
- writeRawTensorsAsFeatureLists(FE, {RewardSpec, None},
- RawLogData.back().data(), NumberOfRecords,
- NumberOfRewards == 1);
- std::string OutStr;
- if (ProtobufTextMode) {
- google::protobuf::TextFormat::PrintToString(SE, &OutStr);
- } else {
- OutStr = SE.SerializeAsString();
+Logger::~Logger() {}
+
+#define LOG_REWARD(NAME, TYPE) \
+ void Logger::log##NAME##Reward(TYPE Value) { \
+ assert(IncludeReward); \
+ LoggerData->logReward(Value); \
}
- OS << OutStr;
+
+LOG_REWARD(Float, float)
+LOG_REWARD(Int32, int32_t)
+LOG_REWARD(Int64, int64_t)
+#undef LOG_REWARD
+
+#define LOG_FINAL_REWARD(NAME, TYPE) \
+ void Logger::log##NAME##FinalReward(TYPE Value) { \
+ assert(RewardSpec.isElementType<TYPE>()); \
+ for (size_t I = 1; I < LoggerData->getNrRecords(); ++I) \
+ log##NAME##Reward(0); \
+ log##NAME##Reward(Value); \
+ }
+
+LOG_FINAL_REWARD(Float, float)
+LOG_FINAL_REWARD(Int32, int32_t)
+LOG_FINAL_REWARD(Int64, int64_t)
+#undef LOG_FINAL_REWARD
+
+void Logger::logFloatValue(size_t FeatureID, const float *Value) {
+ assert(FeatureSpecs[FeatureID].Spec.isElementType<float>());
+ logSpecifiedTensorValue(FeatureID, reinterpret_cast<const char *>(Value));
+}
+
+void Logger::logInt64Value(size_t FeatureID, const int64_t *Value) {
+ assert(FeatureSpecs[FeatureID].Spec.isElementType<int64_t>());
+ logSpecifiedTensorValue(FeatureID, reinterpret_cast<const char *>(Value));
+}
+
+void Logger::logInt32Value(size_t FeatureID, const int32_t *Value) {
+ assert(FeatureSpecs[FeatureID].Spec.isElementType<int32_t>());
+ logSpecifiedTensorValue(FeatureID, reinterpret_cast<const char *>(Value));
}
+
+void Logger::logSpecifiedTensorValue(size_t FeatureID, const char *RawData) {
+ const auto &Spec = FeatureSpecs[FeatureID].Spec;
+ char *Buff = addEntryAndGetFloatOrInt64Buffer(FeatureID);
+ if (Spec.isElementType<int32_t>())
+ for (size_t I = 0; I < Spec.getElementCount(); ++I)
+ (reinterpret_cast<int64_t *>(Buff))[I] =
+ static_cast<int64_t>((reinterpret_cast<const int32_t *>(RawData))[I]);
+ else if (Spec.isElementType<int64_t>() || Spec.isElementType<float>())
+ std::memcpy(Buff, RawData,
+ Spec.getElementCount() * Spec.getElementByteSize());
+ else
+ llvm_unreachable("Unsupported tensor type");
+}
+
+char *Logger::addEntryAndGetFloatOrInt64Buffer(size_t FeatureID) {
+ return reinterpret_cast<char *>(LoggerData->addNewTensor(FeatureID));
+}
+
+void Logger::print(raw_ostream &OS) { LoggerData->print(OS); }
#endif // defined(LLVM_HAVE_TF_API)
diff --git a/llvm/unittests/Analysis/TFUtilsTest.cpp b/llvm/unittests/Analysis/TFUtilsTest.cpp
index a214663379085..1dfa8e10b7384 100644
--- a/llvm/unittests/Analysis/TFUtilsTest.cpp
+++ b/llvm/unittests/Analysis/TFUtilsTest.cpp
@@ -169,14 +169,14 @@ TEST(TFUtilsTest, Logger) {
const float F00[]{0.0, 0.1, 0.2, 0.3, 0.4, 0.5};
const int64_t F01[]{2, 3};
- L.logTensorValue(0, F00, 6);
- L.logTensorValue(1, F01, 2);
- L.logReward<float>(3.4);
+ L.logFloatValue(0, F00);
+ L.logInt64Value(1, F01);
+ L.logFloatReward(3.4);
const float F10[]{0.0, 1.0, 2.0, 3.0, 4.0, 5.0};
const int64_t F11[]{-2, -3};
- L.logTensorValue(0, F10, 6);
- L.logTensorValue(1, F11, 2);
- L.logReward<float>(-3.0);
+ L.logFloatValue(0, F10);
+ L.logInt64Value(1, F11);
+ L.logFloatReward(-3.0);
std::string Result;
raw_string_ostream OS(Result);
L.print(OS);
@@ -193,6 +193,42 @@ TEST(TFUtilsTest, Logger) {
PROTO_CHECKER("reward", float_list, 1, R1);
}
+TEST(TFUtilsTest, LoggerInt32FeaturesAndReward) {
+ std::vector<LoggedFeatureSpec> Features;
+ Features.push_back(
+ {TensorSpec::createSpec<float>("the_float", {2, 3}), None});
+ Features.push_back({TensorSpec::createSpec<int32_t>("the_int", {2}),
+ std::string("alternate_name")});
+
+ auto Rewards = TensorSpec::createSpec<int32_t>("reward", {1});
+ Logger L(Features, Rewards, true);
+ const float F00[]{0.0, 0.1, 0.2, 0.3, 0.4, 0.5};
+ const int32_t F01[]{2, 3};
+
+ L.logFloatValue(0, F00);
+ L.logInt32Value(1, F01);
+ L.logInt32Reward(3);
+ const float F10[]{0.0, 1.0, 2.0, 3.0, 4.0, 5.0};
+ const int32_t F11[]{-2, -3};
+ L.logFloatValue(0, F10);
+ L.logInt32Value(1, F11);
+ L.logInt32Reward(-3);
+ std::string Result;
+ raw_string_ostream OS(Result);
+ L.print(OS);
+
+ tensorflow::SequenceExample Expected;
+ EXPECT_TRUE(Expected.ParseFromString(Result));
+ PROTO_CHECKER("the_float", float_list, 0, F00);
+ PROTO_CHECKER("the_float", float_list, 1, F10);
+ PROTO_CHECKER("alternate_name", int64_list, 0, F01);
+ PROTO_CHECKER("alternate_name", int64_list, 1, F11);
+ int32_t R0[]{3};
+ int32_t R1[]{-3};
+ PROTO_CHECKER("reward", int64_list, 0, R0);
+ PROTO_CHECKER("reward", int64_list, 1, R1);
+}
+
TEST(TFUtilsTest, LoggerNoReward) {
std::vector<LoggedFeatureSpec> Features;
Features.push_back(
@@ -205,12 +241,12 @@ TEST(TFUtilsTest, LoggerNoReward) {
const float F00[]{0.0, 0.1, 0.2, 0.3, 0.4, 0.5};
const int64_t F01[]{2, 3};
- L.logTensorValue(0, F00, 6);
- L.logTensorValue(1, F01, 2);
+ L.logFloatValue(0, F00);
+ L.logInt64Value(1, F01);
const float F10[]{0.0, 1.0, 2.0, 3.0, 4.0, 5.0};
const int64_t F11[]{-2, -3};
- L.logTensorValue(0, F10, 6);
- L.logTensorValue(1, F11, 2);
+ L.logFloatValue(0, F10);
+ L.logInt64Value(1, F11);
std::string Result;
raw_string_ostream OS(Result);
@@ -230,12 +266,12 @@ TEST(TFUtilsTest, LoggerFinalReward) {
auto Rewards = TensorSpec::createSpec<float>("reward", {1});
Logger L(Features, Rewards, true);
- for (size_t I = 0; I < 3; ++I) {
+ for (int64_t I = 0; I < 3; ++I) {
float F = static_cast<float>(I);
- L.logTensorValue(0, &F);
- L.logTensorValue(1, &I);
+ L.logFloatValue(0, &F);
+ L.logInt64Value(1, &I);
}
- L.logFinalReward<float>(3.14);
+ L.logFloatFinalReward(3.14);
std::string Result;
raw_string_ostream OS(Result);
L.print(OS);
More information about the llvm-commits
mailing list