[llvm] 70f8d0a - [llvm] Development-mode InlineAdvisor

Mon Jul 20 11:02:10 PDT 2020

Author: Mircea Trofin
Date: 2020-07-20T11:01:56-07:00
New Revision: 70f8d0ac8a34d7a611db14d0662737ba1c3a3273

URL: https://github.com/llvm/llvm-project/commit/70f8d0ac8a34d7a611db14d0662737ba1c3a3273
DIFF: https://github.com/llvm/llvm-project/commit/70f8d0ac8a34d7a611db14d0662737ba1c3a3273.diff

LOG: [llvm] Development-mode InlineAdvisor

Summary:
This is the InlineAdvisor used in 'development' mode. It enables two
scenarios:

 - loading models via a command-line parameter, thus allowing for rapid
 training iteration, where models can be used for the next exploration
 phase without requiring recompiling the compiler. This trades off some
 compilation speed for the added flexibility.

 - collecting training logs, in the form of tensorflow.SequenceExample
 protobufs. We generate these as textual protobufs, which simplifies
 generation and testing. The protobufs may then be readily consumed by a
 tensorflow-based training algorithm.

To speed up training, training logs may also be collected from the
'default' training policy. In that case, this InlineAdvisor does not
use a model.

RFC: http://lists.llvm.org/pipermail/llvm-dev/2020-April/140763.html

Reviewers: jdoerfert, davidxl

Subscribers: mgorny, hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D83733

Added: 
    llvm/lib/Analysis/DevelopmentModeInlineAdvisor.cpp
    llvm/test/Transforms/Inline/ML/bounds-checks-rewards.ll
    llvm/test/Transforms/Inline/ML/development-training-log.ll
    llvm/test/Transforms/Inline/ML/ml-test-development-mode.ll

Modified: 
    llvm/include/llvm/Analysis/InlineAdvisor.h
    llvm/lib/Analysis/CMakeLists.txt
    llvm/lib/Analysis/InlineAdvisor.cpp
    llvm/test/Bindings/Go/lit.local.cfg
    llvm/test/Transforms/Inline/inlining-advisor-default.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Analysis/InlineAdvisor.h b/llvm/include/llvm/Analysis/InlineAdvisor.h
index 3480d93385a8..e262e9a89261 100644

--- a/llvm/include/llvm/Analysis/InlineAdvisor.h
+++ b/llvm/include/llvm/Analysis/InlineAdvisor.h
@@ -208,6 +208,12 @@ std::unique_ptr<InlineAdvisor>
 getReleaseModeAdvisor(Module &M, ModuleAnalysisManager &MAM);
 #endif
 
+#ifdef LLVM_HAVE_TF_API
+std::unique_ptr<InlineAdvisor>
+getDevelopmentModeAdvisor(Module &M, ModuleAnalysisManager &MAM,
+                          std::function<bool(CallBase &)> GetDefaultAdvice);
+#endif
+
 // Default (manual policy) decision making helper APIs. Shared with the legacy
 // pass manager inliner.
 

diff  --git a/llvm/lib/Analysis/CMakeLists.txt b/llvm/lib/Analysis/CMakeLists.txt
index 703623396d96..a6a267c81368 100644
--- a/llvm/lib/Analysis/CMakeLists.txt
+++ b/llvm/lib/Analysis/CMakeLists.txt
@@ -1,6 +1,9 @@
 set(CommonMLSources MLInlineAdvisor.cpp)
 set(ReleaseModeMLSources ReleaseModeModelRunner.cpp)
-set(DevelopmentModeMLSources TFUtils.cpp)
+set(DevelopmentModeMLSources
+  DevelopmentModeInlineAdvisor.cpp 
+  TFUtils.cpp
+  )
 
 if (DEFINED LLVM_HAVE_TF_AOT OR DEFINED LLVM_HAVE_TF_API)
   set(MLPolicySources ${CommonMLSources})

diff  --git a/llvm/lib/Analysis/DevelopmentModeInlineAdvisor.cpp b/llvm/lib/Analysis/DevelopmentModeInlineAdvisor.cpp
new file mode 100644
index 000000000000..954fc49a8663
--- /dev/null
+++ b/llvm/lib/Analysis/DevelopmentModeInlineAdvisor.cpp
@@ -0,0 +1,485 @@
+//===- DevelopmentModeInlineAdvisor.cpp - runtime-loadable model runner  --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a model runner using Tensorflow C APIs, allowing the
+// loading of a model from a command line option.
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/InlineSizeEstimatorAnalysis.h"
+#include "llvm/Analysis/MLInlineAdvisor.h"
+#include "llvm/Analysis/Utils/TFUtils.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ManagedStatic.h"
+
+#include <vector>
+
+using namespace llvm;
+
+static cl::opt<std::string> TrainingLog(
+    "training-log", cl::Hidden,
+    cl::desc("Path where the development - mode inlining log is saved."));
+
+static cl::opt<std::string> TFModelUnderTrainingPath(
+    "ml-inliner-model-under-training", cl::Hidden,
+    cl::desc("Path to SavedModel from the previous training iteration."));
+
+static cl::opt<std::string> TFFeedPrefix("ml-inliner-trained-model-feed-prefix",
+                                         cl::Hidden, cl::init("action_"),
+                                         cl::desc("Prefix for feature names."));
+
+static cl::opt<std::string> TFDecisionName(
+    "ml-inliner-trained-model-decision-name", cl::Hidden,
+    cl::init("StatefulPartitionedCall"),
+    cl::desc("Name of the graph operation representing the decision."));
+
+namespace {
+/// An InlineEvent, used by TrainingLogger.
+struct InlineEvent {
+  /// What the default policy's decision would have been.
+  bool DefaultDecision = false;
+
+  /// What we advised. When training off the default policy, this is the same as
+  /// DefaultDecision.
+  bool AdvisedDecision = false;
+
+  /// What actually happened. This would be 'false' in the case of an inline
+  /// error, even if AdvisedDecision were true, otherwise it agrees with
+  /// AdvisedDecision.
+  bool Effect = false;
+
+  /// What the change in size was: size_after - size_before
+  int64_t Reward = 0;
+};
+
+/// Collect data we may use for training a model, and write it as a textual
+/// Tensorflow SequenceExample
+/// (https://www.tensorflow.org/api_docs/python/tf/train/SequenceExample)
+/// protobuf (https://developers.google.com/protocol-buffers).
+/// Because this is a protobuf, we cannot just stream the events as they come.
+/// Internally, TrainingLogger stores data in column-major format, because that
+/// lines up with how TF SequenceExample represents it.
+class TrainingLogger final {
+public:
+  TrainingLogger() {
+    for (size_t I = 0; I < NumberOfFeatures; ++I) {
+      Features.push_back(InlineFeatures());
+    }
+  }
+
+  /// Log one inlining event.
+  void logInlineEvent(const InlineEvent &Event,
+                      const MLModelRunner &ModelRunner) {
+    for (size_t I = 0; I < NumberOfFeatures; ++I) {
+      Features[I].push_back(ModelRunner.getFeature(I));
+    }
+    Decisions.push_back(Event.AdvisedDecision);
+    Effects.push_back(Event.Effect);
+    Rewards.push_back(Event.Reward);
+    DefaultDecisions.push_back(Event.DefaultDecision);
+  }
+
+  void printTensor(raw_fd_ostream &OutFile) {
+    if (DefaultDecisions.empty())
+      return;
+    OutFile << "feature_lists: {\n";
+
+    for (size_t I = 0; I < Features.size(); I++) {
+      writeTensor(OutFile, FeatureNameMap.at(I), Features[I]);
+    }
+    writeTensor(OutFile, DefaultDecisionName, DefaultDecisions);
+    writeTensor(OutFile, DecisionName, Decisions);
+    writeTensor(OutFile, RewardName, Rewards);
+
+    OutFile << "}\n";
+  }
+
+private:
+  template <typename T>
+  void writeTensor(raw_fd_ostream &OutFile, StringRef TensorName,
+                   const std::vector<T> &Tensor) {
+    OutFile << "  feature_list: {\n";
+    OutFile << "    key: "
+            << "\"" << TensorName << "\" ";
+    OutFile << "value: {\n";
+    for (const auto &Feature : Tensor) {
+      OutFile << "      feature: { int64_list: { value: [" << Feature
+              << "] } }\n";
+    }
+    OutFile << "    }\n";
+    OutFile << "  }\n";
+  }
+
+  std::vector<InlineFeatures> Features;
+  std::vector<bool> DefaultDecisions;
+  std::vector<bool> Decisions;
+  std::vector<bool> Effects;
+  std::vector<int64_t> Rewards;
+  std::vector<bool> Mandatory;
+};
+
+/// An extension of the MLInlineAdvisor for the 'development' mode, targeting
+/// the offline training scenario. Note that training happens outside of the
+/// compiler, this facility is concerned with producing training data ("logs").
+/// This InlineAdvisor can operate in the following modes:
+///
+/// 1) collect logs for the default policy. This is useful for bootstrapping
+/// training, which will be considerably faster by starting from a reasonable
+/// policy.
+///
+/// 2) collect logs for the ML policy, using a model from a previous
+/// training. Potentially, that model uses internally some small random
+/// perturbation of its weights, to induce exploration (setting this up is the
+/// responsibility of the training algorithm). The logs would then be used to
+/// retrain and improve on this model.
+///
+/// 3) use the provided model, with no logging. This is useful for end to end
+/// validation - the model, in this case, is a release candidate and shouldn't
+/// have random perturbations. It is a convenience feature: rather than needing
+/// to take the release candidate model and compile it in 'release' mode,
+/// validate it, then potentially discard it, it's easier to just pass the model
+/// to the compiler, albeit compilation would be slower, as a one-off. Once the
+/// model behaves satisfactorily, it can be compiled AOT, for efficiency, in
+/// release mode. The expectation is that a well-trained model provides a good
+/// policy over a sufficiently diverse codebase, over many changes (i.e.
+/// training happens seldom).
+class DevelopmentModeMLInlineAdvisor : public MLInlineAdvisor {
+public:
+  DevelopmentModeMLInlineAdvisor(
+      Module &M, ModuleAnalysisManager &MAM,
+      std::unique_ptr<MLModelRunner> ModelRunner,
+      std::function<bool(CallBase &)> GetDefaultAdvice, bool IsDoingInference);
+
+  size_t getTotalSizeEstimate();
+
+  virtual ~DevelopmentModeMLInlineAdvisor();
+  void updateNativeSizeEstimate(int64_t Change) { CurrentNativeSize += Change; }
+  void resetNativeSize(Function *F) {
+    FAM.invalidate<InlineSizeEstimatorAnalysis>(*F);
+  }
+
+  std::unique_ptr<MLInlineAdvice>
+  getMandatoryAdvice(CallBase &CB, OptimizationRemarkEmitter &ORE) override;
+  std::unique_ptr<MLInlineAdvice>
+  getAdviceFromModel(CallBase &CB, OptimizationRemarkEmitter &ORE) override;
+
+  size_t getNativeSizeEstimate(const Function &F) const;
+
+private:
+  bool isLogging() const { return !TrainingLog.empty(); }
+
+  std::function<bool(CallBase &)> GetDefaultAdvice;
+  TrainingLogger Logger;
+  const bool IsDoingInference;
+
+  const int32_t InitialNativeSize;
+  int32_t CurrentNativeSize = 0;
+};
+
+/// A variant of MLInlineAdvice that tracks all non-trivial inlining
+/// decisions, for training/logging.
+class LoggingMLInlineAdvice : public MLInlineAdvice {
+public:
+  LoggingMLInlineAdvice(DevelopmentModeMLInlineAdvisor *Advisor, CallBase &CB,
+                        OptimizationRemarkEmitter &ORE, bool Recommendation,
+                        TrainingLogger &Logger, size_t CallerSizeEstimateBefore,
+                        size_t CalleeSizeEstimateBefore, bool DefaultDecision)
+      : MLInlineAdvice(Advisor, CB, ORE, Recommendation), Logger(Logger),
+        CallerSizeEstimateBefore(CallerSizeEstimateBefore),
+        CalleeSizeEstimateBefore(CalleeSizeEstimateBefore),
+        DefaultDecision(DefaultDecision) {}
+
+  virtual ~LoggingMLInlineAdvice() = default;
+
+private:
+  DevelopmentModeMLInlineAdvisor *getAdvisor() const {
+    return static_cast<DevelopmentModeMLInlineAdvisor *>(Advisor);
+  }
+  void recordInliningImpl() override {
+    MLInlineAdvice::recordInliningImpl();
+    getAdvisor()->resetNativeSize(Caller);
+    int Reward = std::numeric_limits<int>::max();
+    if (!getAdvisor()->isForcedToStop()) {
+      int NativeSizeAfter = getAdvisor()->getNativeSizeEstimate(*Caller) +
+                            CalleeSizeEstimateBefore;
+      Reward = NativeSizeAfter -
+               (CallerSizeEstimateBefore + CalleeSizeEstimateBefore);
+      getAdvisor()->updateNativeSizeEstimate(Reward);
+    }
+    log(Reward, /*Success=*/true);
+  }
+
+  void recordInliningWithCalleeDeletedImpl() override {
+    MLInlineAdvice::recordInliningWithCalleeDeletedImpl();
+    getAdvisor()->resetNativeSize(Caller);
+    if (!getAdvisor()->isForcedToStop()) {
+      int NativeSizeAfter = getAdvisor()->getNativeSizeEstimate(*Caller);
+      int Reward = NativeSizeAfter -
+                   (CallerSizeEstimateBefore + CalleeSizeEstimateBefore);
+      getAdvisor()->updateNativeSizeEstimate(Reward);
+      log(Reward, /*Success=*/true);
+    }
+  }
+
+  void recordUnsuccessfulInliningImpl(const InlineResult &Result) override {
+    MLInlineAdvice::recordUnsuccessfulInliningImpl(Result);
+    log(NoReward, /*Success=*/false);
+  }
+
+  void recordUnattemptedInliningImpl() override {
+    MLInlineAdvice::recordUnattemptedInliningImpl();
+    log(NoReward, /*Success=*/false);
+  }
+
+  void log(int64_t Reward, bool Success) {
+    InlineEvent Event;
+    Event.AdvisedDecision = isInliningRecommended();
+    Event.DefaultDecision = DefaultDecision;
+    Event.Effect = Success;
+    Event.Reward = Reward;
+    Logger.logInlineEvent(Event, getAdvisor()->getModelRunner());
+  }
+
+  static const int64_t NoReward = 0;
+  TrainingLogger &Logger;
+  const size_t CallerSizeEstimateBefore;
+  const size_t CalleeSizeEstimateBefore;
+  const bool DefaultDecision;
+};
+
+/// A pseudo model runner. We use it to store feature values when collecting
+/// logs for the default policy, but never ask it to 'run'.
+class NoInferenceModelRunner : public MLModelRunner {
+public:
+  NoInferenceModelRunner(LLVMContext &Ctx)
+      : MLModelRunner(Ctx), Features(NumberOfFeatures) {}
+  void setFeature(FeatureIndex Index, int64_t Value) override {
+    Features[static_cast<int>(Index)] = Value;
+  }
+
+  int64_t getFeature(int Index) const override { return Features[Index]; }
+  bool run() override {
+    llvm_unreachable("We shouldn't call run on this model runner.");
+  }
+
+private:
+  InlineFeatures Features;
+};
+
+/// ModelUnderTrainingRunner - training mode implementation. It uses TF C APIs
+/// to dynamically load and evaluate a TF SavedModel
+/// (https://www.tensorflow.org/guide/saved_model). Runtime performance is
+/// sacrificed for ease of use while training.
+class ModelUnderTrainingRunner final : public MLModelRunner {
+public:
+  ModelUnderTrainingRunner(LLVMContext &Ctx, const std::string &ModelPath);
+
+  bool run() override;
+
+  // Disallows copy and assign.
+  ModelUnderTrainingRunner(const ModelUnderTrainingRunner &) = delete;
+  ModelUnderTrainingRunner &
+  operator=(const ModelUnderTrainingRunner &) = delete;
+
+  void setFeature(FeatureIndex Index, int64_t Value) override;
+  int64_t getFeature(int Index) const override;
+  bool isValid() const { return !!Evaluator; }
+
+private:
+  std::unique_ptr<TFModelEvaluator> Evaluator;
+
+  // The training framework needs some additional features, that just need to
+  // be set to 0.
+  struct TensorSpec {
+    std::string Name;
+    std::function<void(TFModelEvaluator *, size_t Index,
+                       const std::vector<int64_t> &Dim)>
+        Initializer;
+  };
+
+  const std::vector<TensorSpec> TrainingOnlyFeatures{
+      {"inlining_default",
+       [](TFModelEvaluator *Evaluator, size_t Index,
+          const std::vector<int64_t> &Dim) {
+         Evaluator->initInput<int64_t>(Index, Dim);
+       }},
+      {"discount",
+       [](TFModelEvaluator *Evaluator, size_t Index,
+          const std::vector<int64_t> &Dim) {
+         Evaluator->initInput<float>(Index, Dim);
+       }},
+      {"reward",
+       [](TFModelEvaluator *Evaluator, size_t Index,
+          const std::vector<int64_t> &Dim) {
+         Evaluator->initInput<float>(Index, Dim);
+       }},
+      {"step_type", [](TFModelEvaluator *Evaluator, size_t Index,
+                       const std::vector<int64_t> &Dim) {
+         Evaluator->initInput<int32_t>(Index, Dim);
+       }}};
+};
+} // namespace
+
+DevelopmentModeMLInlineAdvisor::DevelopmentModeMLInlineAdvisor(
+    Module &M, ModuleAnalysisManager &MAM,
+    std::unique_ptr<MLModelRunner> ModelRunner,
+    std::function<bool(CallBase &)> GetDefaultAdvice, bool IsDoingInference)
+    : MLInlineAdvisor(M, MAM, std::move(ModelRunner)),
+      GetDefaultAdvice(GetDefaultAdvice), IsDoingInference(IsDoingInference),
+      InitialNativeSize(isLogging() ? getTotalSizeEstimate() : 0),
+      CurrentNativeSize(InitialNativeSize) {
+  // We cannot have the case of neither inference nor logging.
+  assert(IsDoingInference || isLogging());
+}
+
+DevelopmentModeMLInlineAdvisor::~DevelopmentModeMLInlineAdvisor() {
+  if (TrainingLog.empty())
+    return;
+  std::error_code ErrorCode;
+  raw_fd_ostream OutFile(TrainingLog, ErrorCode);
+  Logger.printTensor(OutFile);
+}
+
+size_t
+DevelopmentModeMLInlineAdvisor::getNativeSizeEstimate(const Function &F) const {
+  auto &R =
+      FAM.getResult<InlineSizeEstimatorAnalysis>(const_cast<Function &>(F));
+  if (!R) {
+    F.getParent()->getContext().emitError(
+        "Native size estimator is not present.");
+    return 0;
+  }
+  return *R;
+}
+
+std::unique_ptr<MLInlineAdvice>
+DevelopmentModeMLInlineAdvisor::getMandatoryAdvice(
+    CallBase &CB, OptimizationRemarkEmitter &ORE) {
+  if (!isLogging())
+    return MLInlineAdvisor::getMandatoryAdvice(CB, ORE);
+  return std::make_unique<LoggingMLInlineAdvice>(
+      /*Advisor=*/this,
+      /*CB=*/CB, /*ORE=*/ORE, /*Recommendation=*/true, /*Logger=*/Logger,
+      /*CallerSizeEstimateBefore=*/getNativeSizeEstimate(*CB.getCaller()),
+      /*CalleeSizeEstimateBefore=*/
+      getNativeSizeEstimate(*CB.getCalledFunction()),
+      /*DefaultDecision=*/true);
+}
+
+std::unique_ptr<MLInlineAdvice>
+DevelopmentModeMLInlineAdvisor::getAdviceFromModel(
+    CallBase &CB, OptimizationRemarkEmitter &ORE) {
+  if (IsDoingInference && !isLogging())
+    return MLInlineAdvisor::getAdviceFromModel(CB, ORE);
+
+  bool DefaultAdvice = GetDefaultAdvice(CB);
+  auto Recommendation = IsDoingInference ? ModelRunner->run() : DefaultAdvice;
+  return std::make_unique<LoggingMLInlineAdvice>(
+      /*Advisor=*/this,
+      /*CB=*/CB, /*ORE=*/ORE, /*Recommendation=*/Recommendation,
+      /*Logger=*/Logger,
+      /*CallerSizeEstimateBefore=*/getNativeSizeEstimate(*CB.getCaller()),
+      /*CalleeSizeEstimateBefore=*/
+      getNativeSizeEstimate(*CB.getCalledFunction()),
+      /*DefaultDecision=*/DefaultAdvice);
+}
+
+size_t DevelopmentModeMLInlineAdvisor::getTotalSizeEstimate() {
+  size_t Ret = 0;
+  for (auto &F : M) {
+    if (F.isDeclaration())
+      continue;
+    if (isFunctionDeleted(&F))
+      continue;
+    Ret += getNativeSizeEstimate(F);
+  }
+  return Ret;
+}
+
+ModelUnderTrainingRunner::ModelUnderTrainingRunner(LLVMContext &Ctx,
+                                                   const std::string &ModelPath)
+    : MLModelRunner(Ctx) {
+  std::vector<std::string> InputNames;
+  std::vector<std::string> OutputNames;
+  for (size_t I = 0; I < NumberOfFeatures; ++I)
+    InputNames.push_back(TFFeedPrefix + FeatureNameMap[I]);
+  for (size_t I = 0; I < TrainingOnlyFeatures.size(); ++I)
+    InputNames.push_back(TFFeedPrefix + TrainingOnlyFeatures[I].Name);
+  OutputNames.push_back(TFDecisionName);
+
+  Evaluator =
+      std::make_unique<TFModelEvaluator>(ModelPath, InputNames, OutputNames);
+  if (!Evaluator || !Evaluator->isValid()) {
+    Ctx.emitError("Failed to create inliner saved model evaluator");
+    Evaluator.reset();
+    return;
+  }
+
+  static const std::vector<int64_t> Dim{1};
+
+  size_t InputIndex = 0;
+  for (; InputIndex < NumberOfFeatures; ++InputIndex) {
+    Evaluator->initInput<int64_t>(InputIndex, Dim);
+  }
+
+  for (; InputIndex < InputNames.size(); ++InputIndex) {
+    TrainingOnlyFeatures[InputIndex - NumberOfFeatures].Initializer(
+        Evaluator.get(), InputIndex, Dim);
+  }
+}
+
+bool ModelUnderTrainingRunner::run() {
+  auto ER = Evaluator->evaluate();
+  if (!ER.hasValue()) {
+    Ctx.emitError("Error evaluating model.");
+    return false;
+  }
+  int64_t Decision = *ER->getTensorValue<int64_t>(0);
+  return static_cast<bool>(Decision);
+}
+
+int64_t ModelUnderTrainingRunner::getFeature(int Index) const {
+  return *Evaluator->getInput<int64_t>(Index);
+}
+
+void ModelUnderTrainingRunner::setFeature(FeatureIndex Index, int64_t Value) {
+  size_t NumericIndex = static_cast<size_t>(Index);
+  *(Evaluator->getInput<int64_t>(NumericIndex)) = Value;
+}
+
+std::unique_ptr<InlineAdvisor> llvm::getDevelopmentModeAdvisor(
+    Module &M, ModuleAnalysisManager &MAM,
+    std::function<bool(CallBase &)> GetDefaultAdvice) {
+  auto &Ctx = M.getContext();
+  if (TrainingLog.empty() !=
+      !InlineSizeEstimatorAnalysis::isEvaluatorRequested()) {
+    Ctx.emitError("For development mode, if training logs are requested, then "
+                  "a size estimator must be available; either that, or neither "
+                  "are specified.");
+    return nullptr;
+  }
+
+  std::unique_ptr<MLModelRunner> Runner;
+
+  bool IsDoingInference = false;
+  if (TFModelUnderTrainingPath.empty())
+    Runner.reset(new NoInferenceModelRunner(Ctx));
+  else {
+    Runner = std::make_unique<ModelUnderTrainingRunner>(
+        Ctx, TFModelUnderTrainingPath);
+    if (!Runner) {
+      Ctx.emitError("Could not load the policy model from the provided path");
+      return nullptr;
+    }
+    IsDoingInference = true;
+  }
+  return std::make_unique<DevelopmentModeMLInlineAdvisor>(
+      M, MAM, std::move(Runner), GetDefaultAdvice, IsDoingInference);
+}
\ No newline at end of file

diff  --git a/llvm/lib/Analysis/InlineAdvisor.cpp b/llvm/lib/Analysis/InlineAdvisor.cpp
index e18f681278d3..ee92450d7e09 100644
--- a/llvm/lib/Analysis/InlineAdvisor.cpp
+++ b/llvm/lib/Analysis/InlineAdvisor.cpp
@@ -159,7 +159,13 @@ bool InlineAdvisorAnalysis::Result::tryCreate(InlineParams Params,
     Advisor.reset(new DefaultInlineAdvisor(FAM, Params));
     break;
   case InliningAdvisorMode::Development:
-    // To be added subsequently under conditional compilation.
+#ifdef LLVM_HAVE_TF_API
+    Advisor =
+        llvm::getDevelopmentModeAdvisor(M, MAM, [&FAM, Params](CallBase &CB) {
+          auto OIC = getDefaultInlineAdvice(CB, FAM, Params);
+          return OIC.hasValue();
+        });
+#endif
     break;
   case InliningAdvisorMode::Release:
 #ifdef LLVM_HAVE_TF_AOT

diff  --git a/llvm/test/Bindings/Go/lit.local.cfg b/llvm/test/Bindings/Go/lit.local.cfg
index e32737079e4a..91a0ad89ab4a 100644
--- a/llvm/test/Bindings/Go/lit.local.cfg
+++ b/llvm/test/Bindings/Go/lit.local.cfg
@@ -9,7 +9,7 @@ if not 'go' in config.root.llvm_bindings:
 if not config.root.include_go_tests:
     config.unsupported = True
 
-if config.have_tf_aot:
+if config.have_tf_aot or config.have_tf_api:
     config.unsupported = True
 
 def find_executable(executable, path=None):

diff  --git a/llvm/test/Transforms/Inline/ML/bounds-checks-rewards.ll b/llvm/test/Transforms/Inline/ML/bounds-checks-rewards.ll
new file mode 100644
index 000000000000..73668c39439e
--- /dev/null
+++ b/llvm/test/Transforms/Inline/ML/bounds-checks-rewards.ll
@@ -0,0 +1,45 @@
+; Test behavior when inlining policy grows size out of control.
+; In all cases, the end result is the same: mandatory inlinings must happen.
+; However, when we discover we 'trip' over the artificially-low size increase 
+; factor, we penalize the 'bad' decision.
+; REQUIRES: have_tf_api
+; RUN: opt -passes=scc-oz-module-inliner -ml-inliner-ir2native-model=%S/../../../../unittests/Analysis/Inputs/ir2native_x86_64_model -ml-inliner-model-under-training=%S/../../../../lib/Analysis/models/inliner -training-log=- -enable-ml-inliner=development -ml-advisor-size-increase-threshold=10.0 -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=NOBOUNDS
+; RUN: opt -passes=scc-oz-module-inliner -ml-inliner-ir2native-model=%S/../../../../unittests/Analysis/Inputs/ir2native_x86_64_model -ml-inliner-model-under-training=%S/../../../../lib/Analysis/models/inliner -training-log=- -enable-ml-inliner=development -ml-advisor-size-increase-threshold=1.0 -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=BOUNDS
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-grtev4-linux-gnu"
+
+declare i64 @f1()
+
+define i64 @f2() #0 {
+  %r = call i64 @f1()
+  %r2 = add i64 13, %r
+  ret i64 %r2
+}
+
+define i64 @some_function() {
+  %r = call i64 @f1()
+  %r2 = add i64 13, %r
+  ret i64 %r2
+}
+
+define i64 @top() {
+  %r = call i64 @f2()
+  %r2 = call i64 @some_function()
+  %r3 = add i64 %r, %r2
+  ret i64 %r3
+}
+
+attributes #0 = { alwaysinline }
+; CHECK: key: "delta_size" value: {
+; NOBOUNDS-NEXT: feature: { int64_list: { value: [10] } }
+; NOBOUNDS-NEXT: feature: { int64_list: { value: [6] } }
+; BOUNDS-NEXT: feature: { int64_list: { value: [2147483647] } }
+; CHECK-NEXT: }
+; CHECK-LABEL: @top
+; f2 must always be inlined, so we won't find a call to it in @top()
+; CHECK-NOT: call i64 @f2
+; @some-function isn't mandatory, and when we set the increase threshold too low,
+; it won't be inlined.
+; NOBOUNDS-NOT: @some_function
+; BOUNDS: call i64 @some_function
\ No newline at end of file

diff  --git a/llvm/test/Transforms/Inline/ML/development-training-log.ll b/llvm/test/Transforms/Inline/ML/development-training-log.ll
new file mode 100644
index 000000000000..4bf6259f7e48
--- /dev/null
+++ b/llvm/test/Transforms/Inline/ML/development-training-log.ll
@@ -0,0 +1,51 @@
+; Test that we can produce a log if we have or do not have a model, in development mode.
+; REQUIRES: have_tf_api
+; RUN: opt -enable-ml-inliner=development -passes=scc-oz-module-inliner -training-log=- -ml-inliner-model-under-training=%S/../../../../lib/Analysis/models/inliner -ml-inliner-ir2native-model=%S/../../../../unittests/Analysis/Inputs/ir2native_x86_64_model -S < %s | FileCheck %s 
+; RUN: opt -enable-ml-inliner=development -passes=scc-oz-module-inliner -training-log=- -ml-inliner-ir2native-model=%S/../../../../unittests/Analysis/Inputs/ir2native_x86_64_model -S < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-linux-gnu"
+
+declare i32 @f1(i32)
+declare i32 @f2(i32)
+
+define dso_local i32 @branches(i32) {
+  %cond = icmp slt i32 %0, 3
+  br i1 %cond, label %then, label %else
+
+then:
+  %ret.1 = call i32 @f1(i32 %0)
+  br label %last.block
+
+else:
+  %ret.2 = call i32 @f2(i32 %0)
+  br label %last.block
+
+last.block:
+  %ret = phi i32 [%ret.1, %then], [%ret.2, %else]
+  ret i32 %ret
+}
+
+define dso_local i32 @top() {
+  %1 = call i32 @branches(i32 2)
+  ret i32 %1
+}
+
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang version 7.0.0-6 (tags/RELEASE_700/final)"}
+
+; Check we produce a protobuf that has inlining decisions and rewards.
+; CHECK:     feature_lists: {
+; CHECK:          key: "inlining_decision" value: {
+; CHECK-NEXT:       feature: { int64_list: { value: [1] } }
+; CHECK-NEXT:     }
+; CHECK-NEXT:   }
+; CHECK-NEXT:   feature_list: {
+; CHECK-NEXT:     key: "delta_size" value: {
+; CHECK-NEXT:       feature: { int64_list: { value: [0] } }
+; CHECK-NEXT:     }
+; CHECK-NEXT:   }
\ No newline at end of file

diff  --git a/llvm/test/Transforms/Inline/ML/ml-test-development-mode.ll b/llvm/test/Transforms/Inline/ML/ml-test-development-mode.ll
new file mode 100644
index 000000000000..0546c9a23d4e
--- /dev/null
+++ b/llvm/test/Transforms/Inline/ML/ml-test-development-mode.ll
@@ -0,0 +1,10 @@
+; The default inliner doesn't elide @adder, it believes it's too costly to inline 
+; adder into switcher. The ML inliner carries out that inlining, resulting in
+; a smaller result (part of it is that adder gets elided).
+;
+; This test uses Inputs/test-module.ll, as it shares it with a similar test
+; for the 'release' mode.
+;
+; REQUIRES: have_tf_api
+; RUN: opt -passes=scc-oz-module-inliner -enable-ml-inliner=default -S < %S/Inputs/test-module.ll 2>&1 | FileCheck %S/Inputs/test-module.ll --check-prefix=DEFAULT
+; RUN: opt -passes=scc-oz-module-inliner -enable-ml-inliner=development -ml-inliner-model-under-training=%S/../../../../lib/Analysis/models/inliner -S < %S/Inputs/test-module.ll 2>&1 | FileCheck %S/Inputs/test-module.ll --check-prefix=CHECK

diff  --git a/llvm/test/Transforms/Inline/inlining-advisor-default.ll b/llvm/test/Transforms/Inline/inlining-advisor-default.ll
index 7ee3e176bbea..8a760ff5b3a8 100644
--- a/llvm/test/Transforms/Inline/inlining-advisor-default.ll
+++ b/llvm/test/Transforms/Inline/inlining-advisor-default.ll
@@ -1,6 +1,7 @@
 ; Check that, in the absence of dependencies, we emit an error message when
 ; trying to use ML-driven inlining.
 ; REQUIRES: !have_tf_aot
+; REQUIRES: !have_tf_api
 ; RUN: not opt -passes=scc-oz-module-inliner -enable-ml-inliner=development -S < %s 2>&1 | FileCheck %s
 ; RUN: not opt -passes=scc-oz-module-inliner -enable-ml-inliner=release -S < %s 2>&1 | FileCheck %s