[llvm] b51e844 - [NFC][TFUtils] Extract out the output spec loader

Wed Nov 18 20:18:04 PST 2020

Author: Mircea Trofin
Date: 2020-11-18T20:03:20-08:00
New Revision: b51e844f7a4ca4a0cb976bd59bf8b5588d6f3be5

URL: https://github.com/llvm/llvm-project/commit/b51e844f7a4ca4a0cb976bd59bf8b5588d6f3be5
DIFF: https://github.com/llvm/llvm-project/commit/b51e844f7a4ca4a0cb976bd59bf8b5588d6f3be5.diff

LOG: [NFC][TFUtils] Extract out the output spec loader

It's generic for the 'development mode', not specific to the inliner
case.

Differential Revision: https://reviews.llvm.org/D91751

Added: 
    

Modified: 
    llvm/include/llvm/Analysis/Utils/TFUtils.h
    llvm/lib/Analysis/DevelopmentModeInlineAdvisor.cpp
    llvm/lib/Analysis/TFUtils.cpp
    llvm/unittests/Analysis/TFUtilsTest.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Analysis/Utils/TFUtils.h b/llvm/include/llvm/Analysis/Utils/TFUtils.h
index 0e697a408c3b..16aef19a7fb5 100644

--- a/llvm/include/llvm/Analysis/Utils/TFUtils.h
+++ b/llvm/include/llvm/Analysis/Utils/TFUtils.h
@@ -100,6 +100,15 @@ class TensorSpec final {
 Optional<TensorSpec> getTensorSpecFromJSON(LLVMContext &Ctx,
                                            const json::Value &Value);
 
+struct LoggedFeatureSpec {
+  TensorSpec Spec;
+  Optional<std::string> LoggingName;
+};
+
+bool loadOutputSpecs(LLVMContext &Ctx, StringRef FileName,
+                     StringRef ExpectedDecisionName,
+                     std::vector<LoggedFeatureSpec> &Ret);
+
 /// Logging utility - given an ordered specification of features, and assuming
 /// a scalar reward, allow logging feature values and rewards, and then print
 /// as tf.train.SequenceExample text protobuf.
@@ -121,11 +130,6 @@ Optional<TensorSpec> getTensorSpecFromJSON(LLVMContext &Ctx,
 /// At the end, call print to generate the protobuf.
 class Logger final {
 public:
-  struct LoggedFeatureSpec {
-    TensorSpec Spec;
-    Optional<std::string> LoggingName;
-  };
-
   /// Construct a Logger. If IncludeReward is false, then logReward shouldn't
   /// be called, and the reward feature won't be printed out.
   Logger(const std::vector<LoggedFeatureSpec> &FeatureSpecs,
@@ -201,6 +205,11 @@ class TFModelEvaluator final {
                    const std::vector<TensorSpec> &InputSpecs,
                    const std::vector<TensorSpec> &OutputSpecs,
                    const char *Tags = "serve");
+  TFModelEvaluator(StringRef SavedModelPath,
+                   const std::vector<TensorSpec> &InputSpecs,
+                   function_ref<TensorSpec(size_t)> GetOutputSpecs,
+                   size_t OutputSpecsSize, const char *Tags = "serve");
+
   ~TFModelEvaluator();
   TFModelEvaluator(const TFModelEvaluator &) = delete;
   TFModelEvaluator(TFModelEvaluator &&) = delete;

diff  --git a/llvm/lib/Analysis/DevelopmentModeInlineAdvisor.cpp b/llvm/lib/Analysis/DevelopmentModeInlineAdvisor.cpp
index 793339fcf271..41aa9a0fe2bf 100644
--- a/llvm/lib/Analysis/DevelopmentModeInlineAdvisor.cpp
+++ b/llvm/lib/Analysis/DevelopmentModeInlineAdvisor.cpp
@@ -298,9 +298,9 @@ class ModelUnderTrainingRunner final : public MLModelRunner {
   int64_t getFeature(int Index) const override;
   bool isValid() const { return !!Evaluator; }
 
-  const std::vector<std::string> &outputNames() const { return OutputNames; }
-
-  const std::vector<TensorSpec> &outputSpecs() const { return OutputSpecs; }
+  const std::vector<LoggedFeatureSpec> &outputLoggedFeatureSpecs() const {
+    return OutputSpecs;
+  }
 
   const Optional<TFModelEvaluator::EvaluationResult> &
   lastEvaluationResult() const {
@@ -309,12 +309,9 @@ class ModelUnderTrainingRunner final : public MLModelRunner {
 
 private:
   std::unique_ptr<TFModelEvaluator> Evaluator;
-  std::vector<std::string> OutputNames;
-  std::vector<TensorSpec> OutputSpecs;
+  std::vector<LoggedFeatureSpec> OutputSpecs;
   Optional<TFModelEvaluator::EvaluationResult> LastEvaluationResult;
 
-  bool loadOutputSpecs(LLVMContext &Ctx, StringRef FileName);
-
   // The training framework needs some additional features.
   const std::vector<TensorSpec> TrainingOnlyFeatures{
       TensorSpec::createSpec<int64_t>(TFFeedPrefix + "inlining_default", {1}),
@@ -329,14 +326,15 @@ TrainingLogger::TrainingLogger(StringRef LogFileName,
     : LogFileName(LogFileName), MUTR(MUTR) {
   // The first output is the inlining decision.
   if (MUTR)
-    OutputCount = MUTR->outputSpecs().size();
-  std::vector<Logger::LoggedFeatureSpec> FT;
+    OutputCount = MUTR->outputLoggedFeatureSpecs().size();
+  std::vector<LoggedFeatureSpec> FT;
 
   for (size_t I = 0; I < NumberOfFeatures; ++I)
     FT.push_back(
         {TensorSpec::createSpec<int64_t>(FeatureNameMap.at(I), {1}), None});
-  for (size_t I = 1; I < OutputCount; ++I)
-    FT.push_back({MUTR->outputSpecs()[I], MUTR->outputNames()[I]});
+  if (MUTR && MUTR->outputLoggedFeatureSpecs().size() > 1)
+    FT.insert(FT.end(), MUTR->outputLoggedFeatureSpecs().begin() + 1,
+              MUTR->outputLoggedFeatureSpecs().end());
 
   DefaultDecisionPos = FT.size();
   FT.push_back(
@@ -361,7 +359,7 @@ void TrainingLogger::logInlineEvent(const InlineEvent &Event,
 
   for (size_t I = 1; I < OutputCount; ++I) {
     const auto &Result = *MUTR->lastEvaluationResult();
-    auto &Spec = MUTR->outputSpecs()[I];
+    auto &Spec = MUTR->outputLoggedFeatureSpecs()[I].Spec;
     const char *RawData =
         reinterpret_cast<const char *>(Result.getUntypedTensorValue(I));
     L->logTensorValue(CurrentFeature, RawData,
@@ -480,11 +478,13 @@ ModelUnderTrainingRunner::ModelUnderTrainingRunner(LLVMContext &Ctx,
     llvm::sys::path::append(OutputSpecsPath, ModelPath, "output_spec.json");
     OutputSpecPath = {OutputSpecsPath.data(), OutputSpecsPath.size()};
   }
-  if (!loadOutputSpecs(Ctx, OutputSpecPath))
+
+  if (!loadOutputSpecs(Ctx, OutputSpecPath, DecisionName, OutputSpecs))
     return;
 
-  Evaluator =
-      std::make_unique<TFModelEvaluator>(ModelPath, InputSpecs, OutputSpecs);
+  Evaluator = std::make_unique<TFModelEvaluator>(
+      ModelPath, InputSpecs, [&](size_t I) { return OutputSpecs[I].Spec; },
+      OutputSpecs.size());
   if (!Evaluator || !Evaluator->isValid()) {
     Ctx.emitError("Failed to create inliner saved model evaluator");
     Evaluator.reset();
@@ -492,63 +492,6 @@ ModelUnderTrainingRunner::ModelUnderTrainingRunner(LLVMContext &Ctx,
   }
 }
 
-bool ModelUnderTrainingRunner::loadOutputSpecs(LLVMContext &Ctx,
-                                               StringRef FileName) {
-  auto BufferOrError = MemoryBuffer::getFileOrSTDIN(FileName);
-  if (!BufferOrError) {
-    Ctx.emitError("Error opening output specs file: " + FileName + " : " +
-                  BufferOrError.getError().message());
-    return false;
-  }
-  auto ParsedJSONValues = json::parse(BufferOrError.get()->getBuffer());
-  if (!ParsedJSONValues) {
-    Ctx.emitError("Could not parse specs file: " + FileName);
-    return false;
-  }
-  auto ValuesArray = ParsedJSONValues->getAsArray();
-  if (!ValuesArray) {
-    Ctx.emitError("Expected an array of {tensor_spec:<TensorSpec>, "
-                  "logging_name:<name>} dictionaries");
-    return false;
-  }
-
-  for (const auto &Value : *ValuesArray)
-    if (const auto *Obj = Value.getAsObject())
-      if (const auto *SpecPart = Obj->get("tensor_spec"))
-        if (auto TensorSpec = getTensorSpecFromJSON(Ctx, *SpecPart))
-          if (auto LoggingName = Obj->getString("logging_name")) {
-            if (!TensorSpec->isElementType<int64_t>() &&
-                !TensorSpec->isElementType<int32_t>() &&
-                !TensorSpec->isElementType<float>()) {
-              Ctx.emitError(
-                  "Only int64, int32, and float tensors are supported. "
-                  "Found unsupported type for tensor named " +
-                  TensorSpec->name());
-              return false;
-            }
-            OutputNames.push_back(LoggingName->str());
-            OutputSpecs.push_back(*TensorSpec);
-          }
-
-  if (ValuesArray->size() != OutputNames.size()) {
-    Ctx.emitError(
-        "Unable to parse output spec. It should be a json file containing an "
-        "array of dictionaries. Each dictionary must have a 'tensor_spec' key, "
-        "with a json object describing a TensorSpec; and a 'logging_name' key, "
-        "which is a string to use as name when logging this tensor in the "
-        "training log.");
-    return false;
-  }
-  assert(OutputNames.size() == OutputSpecs.size());
-  if (OutputNames.empty() || OutputNames[0] != DecisionName) {
-    Ctx.emitError("The first output spec must describe the decision tensor, "
-                  "and must have the logging_name " +
-                  StringRef(DecisionName));
-    return false;
-  }
-  return true;
-}
-
 bool ModelUnderTrainingRunner::run() {
   LastEvaluationResult = Evaluator->evaluate();
   if (!LastEvaluationResult.hasValue()) {

diff  --git a/llvm/lib/Analysis/TFUtils.cpp b/llvm/lib/Analysis/TFUtils.cpp
index a84e967320a0..52cfe0b43366 100644
--- a/llvm/lib/Analysis/TFUtils.cpp
+++ b/llvm/lib/Analysis/TFUtils.cpp
@@ -18,6 +18,7 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/JSON.h"
 #include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/raw_ostream.h"
 
 #include "tensorflow/c/c_api.h"
@@ -83,7 +84,7 @@ void writeTensorValues(raw_ostream &OutFile, const char *TensorData,
 /// feature name in the output is either the provided LoggingName, if
 /// specified, otherwise it's the name of the tensor (as given by Spec).
 void writeRawTensorsAsFeatureLists(raw_ostream &OutFile,
-                                   const Logger::LoggedFeatureSpec &LoggedSpec,
+                                   const LoggedFeatureSpec &LoggedSpec,
                                    const char *TensorData, size_t TensorCount,
                                    bool FinalReward = false) {
   const char *FieldName = "<invalid>";
@@ -215,12 +216,68 @@ Optional<TensorSpec> getTensorSpecFromJSON(LLVMContext &Ctx,
   return None;
 }
 
+bool loadOutputSpecs(LLVMContext &Ctx, StringRef FileName,
+                     StringRef ExpectedDecisionName,
+                     std::vector<LoggedFeatureSpec> &Ret) {
+  auto BufferOrError = MemoryBuffer::getFileOrSTDIN(FileName);
+  if (!BufferOrError) {
+    Ctx.emitError("Error opening output specs file: " + FileName + " : " +
+                  BufferOrError.getError().message());
+    return false;
+  }
+  auto ParsedJSONValues = json::parse(BufferOrError.get()->getBuffer());
+  if (!ParsedJSONValues) {
+    Ctx.emitError("Could not parse specs file: " + FileName);
+    return false;
+  }
+  auto ValuesArray = ParsedJSONValues->getAsArray();
+  if (!ValuesArray) {
+    Ctx.emitError("Expected an array of {tensor_spec:<TensorSpec>, "
+                  "logging_name:<name>} dictionaries");
+    return false;
+  }
+
+  for (const auto &Value : *ValuesArray)
+    if (const auto *Obj = Value.getAsObject())
+      if (const auto *SpecPart = Obj->get("tensor_spec"))
+        if (auto TensorSpec = getTensorSpecFromJSON(Ctx, *SpecPart))
+          if (auto LoggingName = Obj->getString("logging_name")) {
+            if (!TensorSpec->isElementType<int64_t>() &&
+                !TensorSpec->isElementType<int32_t>() &&
+                !TensorSpec->isElementType<float>()) {
+              Ctx.emitError(
+                  "Only int64, int32, and float tensors are supported. "
+                  "Found unsupported type for tensor named " +
+                  TensorSpec->name());
+              return false;
+            }
+            Ret.push_back({*TensorSpec, LoggingName->str()});
+          }
+
+  if (ValuesArray->size() != Ret.size()) {
+    Ctx.emitError(
+        "Unable to parse output spec. It should be a json file containing an "
+        "array of dictionaries. Each dictionary must have a 'tensor_spec' key, "
+        "with a json object describing a TensorSpec; and a 'logging_name' key, "
+        "which is a string to use as name when logging this tensor in the "
+        "training log.");
+    return false;
+  }
+  if (Ret.empty() || *Ret[0].LoggingName != ExpectedDecisionName) {
+    Ctx.emitError("The first output spec must describe the decision tensor, "
+                  "and must have the logging_name " +
+                  StringRef(ExpectedDecisionName));
+    return false;
+  }
+  return true;
+}
+
 class TFModelEvaluatorImpl {
 public:
   TFModelEvaluatorImpl(StringRef SavedModelPath,
                        const std::vector<TensorSpec> &InputSpecs,
-                       const std::vector<TensorSpec> &OutputSpecs,
-                       const char *Tags);
+                       function_ref<TensorSpec(size_t)> GetOutputSpecs,
+                       size_t OutputSpecsSize, const char *Tags);
 
   bool isValid() const { return IsValid; }
   size_t OutputSize() const { return OutputFeed.size(); }
@@ -271,10 +328,11 @@ class TFModelEvaluatorImpl {
 
 TFModelEvaluatorImpl::TFModelEvaluatorImpl(
     StringRef SavedModelPath, const std::vector<TensorSpec> &InputSpecs,
-    const std::vector<TensorSpec> &OutputSpecs, const char *Tags)
+    function_ref<TensorSpec(size_t)> GetOutputSpecs, size_t OutputSpecsSize,
+    const char *Tags = "serve")
     : Graph(createTFGraph()), Options(createTFSessionOptions()),
       InputFeed(InputSpecs.size()), Input(InputSpecs.size()),
-      OutputFeed(OutputSpecs.size()) {
+      OutputFeed(OutputSpecsSize) {
   if (!ensureInitTF()) {
     errs() << "Tensorflow should have been initialized";
     return;
@@ -298,8 +356,8 @@ TFModelEvaluatorImpl::TFModelEvaluatorImpl(
     initInput(I, static_cast<TF_DataType>(InputSpec.typeIndex()),
               InputSpec.shape());
   }
-  for (size_t I = 0; I < OutputSpecs.size(); ++I) {
-    auto &OutputSpec = OutputSpecs[I];
+  for (size_t I = 0; I < OutputSpecsSize; ++I) {
+    auto OutputSpec = GetOutputSpecs(I);
     OutputFeed[I] = {
         TF_GraphOperationByName(Graph.get(), (OutputSpec.name()).c_str()),
         OutputSpec.port()};
@@ -308,15 +366,23 @@ TFModelEvaluatorImpl::TFModelEvaluatorImpl(
   }
 }
 
+TFModelEvaluator::TFModelEvaluator(
+    StringRef SavedModelPath, const std::vector<TensorSpec> &InputSpecs,
+    function_ref<TensorSpec(size_t)> GetOutputSpecs, size_t OutputSpecsSize,
+    const char *Tags)
+    : Impl(new TFModelEvaluatorImpl(SavedModelPath, InputSpecs, GetOutputSpecs,
+                                    OutputSpecsSize, Tags)) {
+  if (!Impl->isValid())
+    Impl.reset();
+}
+
 TFModelEvaluator::TFModelEvaluator(StringRef SavedModelPath,
                                    const std::vector<TensorSpec> &InputSpecs,
                                    const std::vector<TensorSpec> &OutputSpecs,
                                    const char *Tags)
-    : Impl(new TFModelEvaluatorImpl(SavedModelPath, InputSpecs, OutputSpecs,
-                                    Tags)) {
-  if (!Impl->isValid())
-    Impl.reset();
-}
+    : TFModelEvaluator(
+          SavedModelPath, InputSpecs, [&](size_t I) { return OutputSpecs[I]; },
+          OutputSpecs.size(), Tags) {}
 
 TFModelEvaluatorImpl::~TFModelEvaluatorImpl() {
   for (auto *T : Input) {

diff  --git a/llvm/unittests/Analysis/TFUtilsTest.cpp b/llvm/unittests/Analysis/TFUtilsTest.cpp
index f07bc27e057c..1cd64f15e288 100644
--- a/llvm/unittests/Analysis/TFUtilsTest.cpp
+++ b/llvm/unittests/Analysis/TFUtilsTest.cpp
@@ -144,7 +144,7 @@ TEST(TFUtilsTest, TensorSpecSizesAndTypes) {
 }
 
 TEST(TFUtilsTest, Logger) {
-  std::vector<Logger::LoggedFeatureSpec> Features;
+  std::vector<LoggedFeatureSpec> Features;
   Features.push_back(
       {TensorSpec::createSpec<float>("the_float", {2, 3}), None});
   Features.push_back({TensorSpec::createSpec<int64_t>("the_int", {2}),
@@ -191,7 +191,7 @@ TEST(TFUtilsTest, Logger) {
 }
 
 TEST(TFUtilsTest, LoggerNoReward) {
-  std::vector<Logger::LoggedFeatureSpec> Features;
+  std::vector<LoggedFeatureSpec> Features;
   Features.push_back(
       {TensorSpec::createSpec<float>("the_float", {2, 3}), None});
   Features.push_back({TensorSpec::createSpec<int64_t>("the_int", {2}),
@@ -230,7 +230,7 @@ TEST(TFUtilsTest, LoggerNoReward) {
 }
 
 TEST(TFUtilsTest, LoggerFinalReward) {
-  std::vector<Logger::LoggedFeatureSpec> Features;
+  std::vector<LoggedFeatureSpec> Features;
   Features.push_back({TensorSpec::createSpec<float>("the_float", {1}), None});
   Features.push_back({TensorSpec::createSpec<int64_t>("the_int", {1}), None});