[llvm] [llvm]Add a simple Telemetry framework (PR #102323)

Vy Nguyen via llvm-commits llvm-commits at lists.llvm.org
Fri Sep 27 06:35:06 PDT 2024


================
@@ -0,0 +1,687 @@
+//===- llvm/unittest/Telemetry/TelemetryTest.cpp - Telemetry unittests ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Telemetry/Telemetry.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/JSON.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/raw_ostream.h"
+#include "gtest/gtest.h"
+#include <chrono>
+#include <ctime>
+#include <vector>
+
+// Testing parameters.
+// These are set by each test to force certain outcomes.
+// Since the tests may run in parallel, each test will have
+// its own TestContext populated.
+struct TestContext {
+  // Controlling whether there should be an Exit error (if so, what the
+  // expected exit message/description should be).
+  bool HasExitError = false;
+  std::string ExitMsg = "";
+
+  // Controlling whether there is a vendor-provided config for
+  // Telemetry.
+  bool HasVendorConfig = false;
+
+  // Controlling whether the data should be sanitized.
+  bool SanitizeData = false;
+
+  // These two fields data emitted by the framework for later
+  // verifications by the tests.
+  std::string Buffer = "";
+  std::vector<llvm::json::Object> EmittedJsons;
+
+  // The expected Uuid generated by the fake tool.
+  std::string ExpectedUuid = "";
+};
+
+namespace llvm {
+namespace telemetry {
+namespace vendor_code {
+
+// Generate unique (but deterministic "uuid" for testing purposes).
+static std::string nextUuid() {
+  static std::atomic<int> seed = 1111;
+  return std::to_string(seed.fetch_add(1, std::memory_order_acquire));
+}
+
+struct VendorEntryKind {
+  static const KindType VendorCommon = 168; // 0b010101000
+  static const KindType Startup = 169;      // 0b010101001
+  static const KindType Exit = 170;         // 0b010101010
+};
+
+// Describes the exit signal of an event.
+// This is used by TelemetryInfo below.
+struct ExitDescription {
+  int ExitCode;
+  std::string Description;
+};
+
+// Defines a convenient type for timestamp of various events.
+// This is used by the EventStats below.
+using SteadyTimePoint = std::chrono::time_point<std::chrono::steady_clock>;
+
+// Various time (and possibly memory) statistics of an event.
+struct EventStats {
+  // REQUIRED: Start time of an event
+  SteadyTimePoint Start;
+  // OPTIONAL: End time of an event - may be empty if not meaningful.
+  std::optional<SteadyTimePoint> End;
+  // TBD: could add some memory stats here too?
+
+  EventStats() = default;
+  EventStats(SteadyTimePoint Start) : Start(Start) {}
+  EventStats(SteadyTimePoint Start, SteadyTimePoint End)
+      : Start(Start), End(End) {}
+};
+
+// Demonstrates that the TelemetryInfo (data courier) struct can be extended
+// by downstream code to store additional data as needed.
+// It can also define additional data serialization method.
+struct VendorCommonTelemetryInfo : public TelemetryInfo {
+  static bool classof(const TelemetryInfo *T) {
+    if (T == nullptr)
+      return false;
+    // Subclasses of this is also acceptable.
+    return (T->getKind() & VendorEntryKind::VendorCommon) ==
+           VendorEntryKind::VendorCommon;
+  }
+
+  KindType getKind() const override { return VendorEntryKind::VendorCommon; }
+
+  virtual void serializeToStream(llvm::raw_ostream &OS) const = 0;
+
+  std::optional<ExitDescription> ExitDesc;
+  EventStats Stats;
+  size_t Counter;
+};
+
+struct StartupEvent : public VendorCommonTelemetryInfo {
+  std::string MagicStartupMsg;
+
+  StartupEvent() = default;
+  StartupEvent(const StartupEvent &E) = default;
+
+  static bool classof(const TelemetryInfo *T) {
+    if (T == nullptr)
+      return false;
+    return T->getKind() == VendorEntryKind::Startup;
+  }
+
+  KindType getKind() const override { return VendorEntryKind::Startup; }
+
+  void serializeToStream(llvm::raw_ostream &OS) const override {
+    OS << "SessionId:" << SessionId << "\n";
+    OS << "MagicStartupMsg:" << MagicStartupMsg << "\n";
+  }
+
+  json::Object serializeToJson() const override {
+    return json::Object{
+        {"Startup",
+         {{"SessionId", SessionId}, {"MagicStartupMsg", MagicStartupMsg}}},
+    };
+  }
+};
+
+struct ExitEvent : public VendorCommonTelemetryInfo {
+  std::string MagicExitMsg;
+
+  ExitEvent() = default;
+  // Provide a copy ctor because we may need to make a copy
+  // before sanitizing the Entry.
+  ExitEvent(const ExitEvent &E) = default;
+
+  static bool classof(const TelemetryInfo *T) {
+    if (T == nullptr)
+      return false;
+    return T->getKind() == VendorEntryKind::Exit;
+  }
+
+  unsigned getKind() const override { return VendorEntryKind::Exit; }
+
+  void serializeToStream(llvm::raw_ostream &OS) const override {
+    OS << "SessionId:" << SessionId << "\n";
+    if (ExitDesc.has_value())
+      OS << "ExitCode:" << ExitDesc->ExitCode << "\n";
+    OS << "MagicExitMsg:" << MagicExitMsg << "\n";
+  }
+
+  json::Object serializeToJson() const override {
+    json::Array I = json::Array{
+        {"SessionId", SessionId},
+        {"MagicExitMsg", MagicExitMsg},
+    };
+    if (ExitDesc.has_value())
+      I.push_back(json::Value({"ExitCode", ExitDesc->ExitCode}));
+    return json::Object{
+        {"Exit", std::move(I)},
+    };
+  }
+};
+
+struct CustomTelemetryEvent : public VendorCommonTelemetryInfo {
+  std::vector<std::string> Msgs;
+
+  CustomTelemetryEvent() = default;
+  CustomTelemetryEvent(const CustomTelemetryEvent &E) = default;
+
+  void serializeToStream(llvm::raw_ostream &OS) const override {
+    OS << "SessionId:" << SessionId << "\n";
+    int I = 0;
+    for (const std::string &M : Msgs) {
+      OS << "MSG_" << I << ":" << M << "\n";
+      ++I;
+    }
+  }
+
+  json::Object serializeToJson() const override {
+    json::Object Inner;
+    Inner.try_emplace("SessionId", SessionId);
+    int I = 0;
+    for (const std::string &M : Msgs) {
+      Inner.try_emplace(("MSG_" + llvm::Twine(I)).str(), M);
+      ++I;
+    }
+
+    return json::Object{{"Midpoint", std::move(Inner)}};
+  }
+};
+
+// The following classes demonstrate how downstream code can
+// define one or more custom Destination(s) to handle
+// Telemetry data differently, specifically:
+//    + which data to send (fullset or sanitized)
+//    + where to send the data
+//    + in what form
+
+static constexpr llvm::StringLiteral STRING_DEST("STRING");
+static constexpr llvm::StringLiteral JSON_DEST("JSON");
+
+// This Destination sends data to a std::string given at ctor.
+class StringDestination : public Destination {
+public:
+  // ShouldSanitize: if true, sanitize the data before emitting, otherwise, emit
+  // the full set.
+  StringDestination(bool ShouldSanitize, std::string &Buf)
+      : ShouldSanitize(ShouldSanitize), OS(Buf) {}
+
+  Error emitEntry(const TelemetryInfo *Entry) override {
+    if (isa<VendorCommonTelemetryInfo>(Entry)) {
+      if (auto *E = dyn_cast<VendorCommonTelemetryInfo>(Entry)) {
+        if (ShouldSanitize) {
+          if (isa<StartupEvent>(E) || isa<ExitEvent>(E)) {
+            // There is nothing to sanitize for this type of data, so keep
+            // as-is.
+            E->serializeToStream(OS);
+          } else if (isa<CustomTelemetryEvent>(E)) {
+            auto Sanitized = sanitizeFields(dyn_cast<CustomTelemetryEvent>(E));
+            Sanitized.serializeToStream(OS);
+          } else {
+            llvm_unreachable("unexpected type");
+          }
+        } else {
+          E->serializeToStream(OS);
+        }
+      }
+    } else {
+      // Unfamiliar entries, just send the entry's UUID
+      OS << "SessionId:" << Entry->SessionId << "\n";
+    }
+    return Error::success();
+  }
+
+  llvm::StringLiteral name() const override { return STRING_DEST; }
+
+private:
+  // Returns a copy of the given entry, but with some fields sanitized.
+  CustomTelemetryEvent sanitizeFields(const CustomTelemetryEvent *Entry) {
+    CustomTelemetryEvent Sanitized(*Entry);
+    // Pretend that messages stored at ODD positions are "sensitive",
+    // hence need to be sanitized away.
+    int S = Sanitized.Msgs.size() - 1;
+    for (int I = S % 2 == 0 ? S - 1 : S; I >= 0; I -= 2)
+      Sanitized.Msgs[I] = "";
+    return Sanitized;
+  }
+
+  bool ShouldSanitize;
+  llvm::raw_string_ostream OS;
+};
+
+// This Destination sends data to some "blackbox" in form of JSON.
+class JsonStreamDestination : public Destination {
+public:
+  JsonStreamDestination(bool ShouldSanitize, TestContext *Ctxt)
+      : ShouldSanitize(ShouldSanitize), CurrentContext(Ctxt) {}
+
+  Error emitEntry(const TelemetryInfo *Entry) override {
+    if (auto *E = dyn_cast<VendorCommonTelemetryInfo>(Entry)) {
+      if (ShouldSanitize) {
+        if (isa<StartupEvent>(E) || isa<ExitEvent>(E)) {
+          // There is nothing to sanitize for this type of data, so keep as-is.
+          return SendToBlackbox(E->serializeToJson());
+        }
+        if (isa<CustomTelemetryEvent>(E)) {
+          auto Sanitized = sanitizeFields(dyn_cast<CustomTelemetryEvent>(E));
+          return SendToBlackbox(Sanitized.serializeToJson());
+        }
+        llvm_unreachable("unexpected type");
+      }
+      return SendToBlackbox(E->serializeToJson());
+    }
+    // Unfamiliar entries, just send the entry's ID
+    return SendToBlackbox(json::Object{{"SessionId", Entry->SessionId}});
+  }
+
+  llvm::StringLiteral name() const override { return JSON_DEST; }
+
+private:
+  // Returns a copy of the given entry, but with some fields sanitized.
+  CustomTelemetryEvent sanitizeFields(const CustomTelemetryEvent *Entry) {
+    CustomTelemetryEvent Sanitized(*Entry);
+    // Pretend that messages stored at EVEN positions are "sensitive",
+    // hence need to be sanitized away.
+    int S = Sanitized.Msgs.size() - 1;
+    for (int I = S % 2 == 0 ? S : S - 1; I >= 0; I -= 2)
+      Sanitized.Msgs[I] = "";
+
+    return Sanitized;
+  }
+
+  llvm::Error SendToBlackbox(json::Object O) {
+    // Here is where the vendor-defined Destination class can
+    // send the data to some internal storage.
+    // For testing purposes, we just queue up the entries to
+    // the vector for validation.
+    CurrentContext->EmittedJsons.push_back(std::move(O));
+    return Error::success();
+  }
+  bool ShouldSanitize;
+  TestContext *CurrentContext;
+};
+
+// Custom vendor-defined Telemeter that has additional data-collection point.
+class TestTelemeter : public Telemeter {
+public:
+  TestTelemeter(std::string SessionId) : Uuid(SessionId), Counter(0) {}
+
+  static std::unique_ptr<TestTelemeter>
+  createInstance(Config *config, TestContext *CurrentContext) {
+    if (!config->EnableTelemetry)
+      return nullptr;
+    CurrentContext->ExpectedUuid = nextUuid();
+    std::unique_ptr<TestTelemeter> Telemeter =
+        std::make_unique<TestTelemeter>(CurrentContext->ExpectedUuid);
+    // Set up Destination based on the given config.
+    for (const std::string &Dest : config->AdditionalDestinations) {
+      // The destination(s) are ALSO defined by vendor, so it should understand
+      // what the name of each destination signifies.
+      if (llvm::StringRef(Dest) == JSON_DEST) {
+        Telemeter->addDestination(
+            std::make_unique<vendor_code::JsonStreamDestination>(
+                CurrentContext->SanitizeData, CurrentContext));
+      } else if (llvm::StringRef(Dest) == STRING_DEST) {
+        Telemeter->addDestination(
+            std::make_unique<vendor_code::StringDestination>(
+                CurrentContext->SanitizeData, CurrentContext->Buffer));
+      } else {
+        llvm_unreachable(
+            llvm::Twine("unknown destination: ", Dest).str().c_str());
+      }
+    }
+    Telemeter->CurrentContext = CurrentContext;
+    return Telemeter;
+  }
+
+  void atStartup(llvm::StringRef ToolPath, TelemetryInfo *Entry) override {
+    ToolName = ToolPath.str();
+
+    // The vendor can add additional stuff to the entry before logging.
+    if (auto *S = dyn_cast<StartupEvent>(Entry)) {
+      S->MagicStartupMsg = llvm::Twine("Startup_", ToolPath).str();
+    }
+    emitToDestinations(Entry);
+  }
+
+  void atExit(llvm::StringRef ToolPath, TelemetryInfo *Entry) override {
+    // Ensure we're shutting down the same tool we started with.
+    if (ToolPath != ToolName) {
+      std::string Str;
+      raw_string_ostream OS(Str);
+      OS << "Expected tool with name" << ToolName << ", but got " << ToolPath;
+      llvm_unreachable(Str.c_str());
+    }
+
+    // The vendor can add additional stuff to the entry before logging.
+    if (auto *E = dyn_cast<ExitEvent>(Entry)) {
+      E->MagicExitMsg = llvm::Twine("Exit_", ToolPath).str();
+    }
+
+    emitToDestinations(Entry);
+  }
+
+  void addDestination(std::unique_ptr<Destination> Dest) override {
+    Destinations.push_back(std::move(Dest));
+  }
+
+  void atMidpoint(TelemetryInfo *Entry) {
+    // The custom Telemeter can record and send additional data.
+    if (auto *C = dyn_cast<CustomTelemetryEvent>(Entry)) {
+      C->Msgs.push_back("Two");
+      C->Msgs.push_back("Deux");
+      C->Msgs.push_back("Zwei");
+    }
+
+    emitToDestinations(Entry);
+  }
+
+  const std::string &getUuid() const { return Uuid; }
+
+  ~TestTelemeter() = default;
+
+  template <typename T> T makeDefaultTelemetryInfo() {
+    T Ret;
+    Ret.SessionId = Uuid;
+    Ret.Counter = Counter++;
+    return Ret;
+  }
+
+  TestContext *CurrentContext = nullptr;
+
+private:
+  void emitToDestinations(TelemetryInfo *Entry) {
+    for (const auto &Dest : Destinations) {
+      llvm::Error err = Dest->emitEntry(Entry);
+      if (err) {
+        // Log it and move on.
+      }
+    }
+  }
+
+  const std::string Uuid;
+  size_t Counter;
+  std::string ToolName;
+  std::vector<std::unique_ptr<Destination>> Destinations;
+};
+
+// Pretend to be a "weakly" defined vendor-specific function.
+void ApplyVendorSpecificConfigs(Config *config) {
+  config->EnableTelemetry = true;
+}
+
+} // namespace vendor_code
+} // namespace telemetry
+} // namespace llvm
+
+namespace {
+
+void ApplyCommonConfig(llvm::telemetry::Config *config) {
+  // Any shareable configs for the upstream tool can go here.
+  // .....
+}
+
+std::shared_ptr<llvm::telemetry::Config>
+GetTelemetryConfig(TestContext *CurrentContext) {
+  // Telemetry is disabled by default.
+  // The vendor can enable in their config.
+  auto Config = std::make_shared<llvm::telemetry::Config>();
+  Config->EnableTelemetry = false;
+
+  ApplyCommonConfig(Config.get());
+
+  // Apply vendor specific config, if present.
+  // In principle, this would be a build-time param, configured by the vendor.
+  // Eg:
+  //
+  // #ifdef HAS_VENDOR_TELEMETRY_CONFIG
+  //     llvm::telemetry::vendor_code::ApplyVendorSpecificConfigs(config.get());
+  // #endif
+  //
+  // But for unit testing, we use the testing params defined at the top.
+  if (CurrentContext->HasVendorConfig) {
+    llvm::telemetry::vendor_code::ApplyVendorSpecificConfigs(Config.get());
+  }
+  return Config;
+}
+
+using namespace llvm;
+using namespace llvm::telemetry;
+
+// For deterministic tests, pre-defined certain important time-points
+// rather than using now().
+//
+// Preset StartTime to EPOCH.
+auto StartTime = std::chrono::time_point<std::chrono::steady_clock>{};
+// Pretend the time it takes for the tool's initialization is EPOCH + 5
+// milliseconds
+auto InitCompleteTime = StartTime + std::chrono::milliseconds(5);
+auto MidPointTime = StartTime + std::chrono::milliseconds(10);
+auto MidPointCompleteTime = MidPointTime + std::chrono::milliseconds(5);
+// Preset ExitTime to EPOCH + 20 milliseconds
+auto ExitTime = StartTime + std::chrono::milliseconds(20);
+// Pretend the time it takes to complete tearing down the tool is 10
+// milliseconds.
+auto ExitCompleteTime = ExitTime + std::chrono::milliseconds(10);
+
+void AtToolStart(std::string ToolName, vendor_code::TestTelemeter *T) {
+  vendor_code::StartupEvent Entry =
+      T->makeDefaultTelemetryInfo<vendor_code::StartupEvent>();
+  Entry.Stats = {StartTime, InitCompleteTime};
+  T->atStartup(ToolName, &Entry);
+}
+
+void AtToolExit(std::string ToolName, vendor_code::TestTelemeter *T) {
+  vendor_code::ExitEvent Entry =
+      T->makeDefaultTelemetryInfo<vendor_code::ExitEvent>();
+  Entry.Stats = {ExitTime, ExitCompleteTime};
+
+  if (T->CurrentContext->HasExitError) {
+    Entry.ExitDesc = {1, T->CurrentContext->ExitMsg};
+  }
+  T->atExit(ToolName, &Entry);
+}
+
+void AtToolMidPoint(vendor_code::TestTelemeter *T) {
+  vendor_code::CustomTelemetryEvent Entry =
+      T->makeDefaultTelemetryInfo<vendor_code::CustomTelemetryEvent>();
+  Entry.Stats = {MidPointTime, MidPointCompleteTime};
+  T->atMidpoint(&Entry);
+}
+
+// Without vendor's implementation, telemetry is not enabled by default.
+TEST(TelemetryTest, TelemetryDefault) {
+  // Preset some test params.
+  TestContext Context;
+  Context.HasVendorConfig = false;
+  TestContext *CurrentContext = &Context;
+
+  std::shared_ptr<llvm::telemetry::Config> Config =
+      GetTelemetryConfig(CurrentContext);
+  auto Tool =
+      vendor_code::TestTelemeter::createInstance(Config.get(), CurrentContext);
+
+  EXPECT_EQ(nullptr, Tool.get());
+}
+
+TEST(TelemetryTest, TelemetryEnabled) {
+  const std::string ToolName = "TelemetryTest";
+
+  // Preset some test params.
+  TestContext Context;
+  Context.HasVendorConfig = true;
+  Context.SanitizeData = false;
+  Context.Buffer.clear();
+  Context.EmittedJsons.clear();
+  TestContext *CurrentContext = &Context;
+
+  std::shared_ptr<llvm::telemetry::Config> Config =
+      GetTelemetryConfig(CurrentContext);
+
+  // Add some destinations
+  Config->AdditionalDestinations.push_back(vendor_code::STRING_DEST.str());
+  Config->AdditionalDestinations.push_back(vendor_code::JSON_DEST.str());
+
+  auto Tool =
+      vendor_code::TestTelemeter::createInstance(Config.get(), CurrentContext);
+
+  AtToolStart(ToolName, Tool.get());
+  AtToolMidPoint(Tool.get());
+  AtToolExit(ToolName, Tool.get());
+
+  // Check that the Tool uses the expected UUID.
+  EXPECT_STREQ(Tool->getUuid().c_str(), CurrentContext->ExpectedUuid.c_str());
+
+  // Check that the StringDestination emitted properly
+  {
+    std::string ExpectedBuffer =
+        ("SessionId:" + llvm::Twine(CurrentContext->ExpectedUuid) + "\n" +
+         "MagicStartupMsg:Startup_" + llvm::Twine(ToolName) + "\n" +
+         "SessionId:" + llvm::Twine(CurrentContext->ExpectedUuid) + "\n" +
+         "MSG_0:Two\n" + "MSG_1:Deux\n" + "MSG_2:Zwei\n" +
+         "SessionId:" + llvm::Twine(CurrentContext->ExpectedUuid) + "\n" +
+         "MagicExitMsg:Exit_" + llvm::Twine(ToolName) + "\n")
+            .str();
+
+    EXPECT_STREQ(ExpectedBuffer.c_str(), CurrentContext->Buffer.c_str());
+  }
+
+  // Check that the JsonDestination emitted properly
+  {
+
+    // There should be 3 events emitted by the Telemeter (start, midpoint, exit)
+    EXPECT_EQ(static_cast<size_t>(3), CurrentContext->EmittedJsons.size());
+
+    const json::Value *StartupEntry =
+        CurrentContext->EmittedJsons[0].get("Startup");
+    ASSERT_NE(StartupEntry, nullptr);
+    llvm::Expected<json::Value> ExpectedStartup = json::parse(
+        ("[[\"SessionId\",\"" + llvm::Twine(CurrentContext->ExpectedUuid) +
+         "\"],[\"MagicStartupMsg\",\"Startup_" + llvm::Twine(ToolName) + "\"]]")
+            .str());
+    ASSERT_TRUE((bool)ExpectedStartup);
+    EXPECT_EQ(ExpectedStartup.get(), *StartupEntry);
+
+    const json::Value *MidpointEntry =
+        CurrentContext->EmittedJsons[1].get("Midpoint");
+    ASSERT_NE(MidpointEntry, nullptr);
+    llvm::Expected<json::Value> ExpectedMidpoint =
+        json::parse(("{\"MSG_0\":\"Two\",\"MSG_1\":\"Deux\",\"MSG_2\":\"Zwei\","
+                     "\"SessionId\":\"" +
+                     llvm::Twine(CurrentContext->ExpectedUuid) + "\"}")
+                        .str());
----------------
oontvoo wrote:

The string literal was  (IMO) a bit easier and faster to see what the expected data should be 

https://github.com/llvm/llvm-project/pull/102323


More information about the llvm-commits mailing list