[llvm] [llvm]Add a simple Telemetry framework (PR #102323)

Pavel Labath via llvm-commits llvm-commits at lists.llvm.org
Fri Sep 27 01:04:37 PDT 2024


================
@@ -0,0 +1,687 @@
+//===- llvm/unittest/Telemetry/TelemetryTest.cpp - Telemetry unittests ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Telemetry/Telemetry.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/JSON.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/raw_ostream.h"
+#include "gtest/gtest.h"
+#include <chrono>
+#include <ctime>
+#include <vector>
+
+// Testing parameters.
+// These are set by each test to force certain outcomes.
+// Since the tests may run in parallel, each test will have
+// its own TestContext populated.
+struct TestContext {
+  // Controlling whether there should be an Exit error (if so, what the
+  // expected exit message/description should be).
+  bool HasExitError = false;
+  std::string ExitMsg = "";
+
+  // Controlling whether there is a vendor-provided config for
+  // Telemetry.
+  bool HasVendorConfig = false;
+
+  // Controlling whether the data should be sanitized.
+  bool SanitizeData = false;
+
+  // These two fields data emitted by the framework for later
+  // verifications by the tests.
+  std::string Buffer = "";
+  std::vector<llvm::json::Object> EmittedJsons;
+
+  // The expected Uuid generated by the fake tool.
+  std::string ExpectedUuid = "";
+};
+
+namespace llvm {
+namespace telemetry {
+namespace vendor_code {
+
+// Generate unique (but deterministic "uuid" for testing purposes).
+static std::string nextUuid() {
+  static std::atomic<int> seed = 1111;
+  return std::to_string(seed.fetch_add(1, std::memory_order_acquire));
+}
+
+struct VendorEntryKind {
+  static const KindType VendorCommon = 168; // 0b010101000
+  static const KindType Startup = 169;      // 0b010101001
+  static const KindType Exit = 170;         // 0b010101010
+};
+
+// Describes the exit signal of an event.
+// This is used by TelemetryInfo below.
+struct ExitDescription {
+  int ExitCode;
+  std::string Description;
+};
+
+// Defines a convenient type for timestamp of various events.
+// This is used by the EventStats below.
+using SteadyTimePoint = std::chrono::time_point<std::chrono::steady_clock>;
+
+// Various time (and possibly memory) statistics of an event.
+struct EventStats {
+  // REQUIRED: Start time of an event
+  SteadyTimePoint Start;
+  // OPTIONAL: End time of an event - may be empty if not meaningful.
+  std::optional<SteadyTimePoint> End;
+  // TBD: could add some memory stats here too?
+
+  EventStats() = default;
+  EventStats(SteadyTimePoint Start) : Start(Start) {}
+  EventStats(SteadyTimePoint Start, SteadyTimePoint End)
+      : Start(Start), End(End) {}
+};
+
+// Demonstrates that the TelemetryInfo (data courier) struct can be extended
+// by downstream code to store additional data as needed.
+// It can also define additional data serialization method.
+struct VendorCommonTelemetryInfo : public TelemetryInfo {
+  static bool classof(const TelemetryInfo *T) {
+    if (T == nullptr)
+      return false;
+    // Subclasses of this is also acceptable.
+    return (T->getKind() & VendorEntryKind::VendorCommon) ==
+           VendorEntryKind::VendorCommon;
+  }
+
+  KindType getKind() const override { return VendorEntryKind::VendorCommon; }
+
+  virtual void serializeToStream(llvm::raw_ostream &OS) const = 0;
+
+  std::optional<ExitDescription> ExitDesc;
+  EventStats Stats;
+  size_t Counter;
+};
+
+struct StartupEvent : public VendorCommonTelemetryInfo {
+  std::string MagicStartupMsg;
+
+  StartupEvent() = default;
+  StartupEvent(const StartupEvent &E) = default;
+
+  static bool classof(const TelemetryInfo *T) {
+    if (T == nullptr)
+      return false;
+    return T->getKind() == VendorEntryKind::Startup;
+  }
+
+  KindType getKind() const override { return VendorEntryKind::Startup; }
+
+  void serializeToStream(llvm::raw_ostream &OS) const override {
+    OS << "SessionId:" << SessionId << "\n";
+    OS << "MagicStartupMsg:" << MagicStartupMsg << "\n";
+  }
+
+  json::Object serializeToJson() const override {
+    return json::Object{
+        {"Startup",
+         {{"SessionId", SessionId}, {"MagicStartupMsg", MagicStartupMsg}}},
+    };
+  }
+};
+
+struct ExitEvent : public VendorCommonTelemetryInfo {
+  std::string MagicExitMsg;
+
+  ExitEvent() = default;
+  // Provide a copy ctor because we may need to make a copy
+  // before sanitizing the Entry.
+  ExitEvent(const ExitEvent &E) = default;
+
+  static bool classof(const TelemetryInfo *T) {
+    if (T == nullptr)
+      return false;
+    return T->getKind() == VendorEntryKind::Exit;
+  }
+
+  unsigned getKind() const override { return VendorEntryKind::Exit; }
+
+  void serializeToStream(llvm::raw_ostream &OS) const override {
+    OS << "SessionId:" << SessionId << "\n";
+    if (ExitDesc.has_value())
+      OS << "ExitCode:" << ExitDesc->ExitCode << "\n";
+    OS << "MagicExitMsg:" << MagicExitMsg << "\n";
+  }
+
+  json::Object serializeToJson() const override {
+    json::Array I = json::Array{
+        {"SessionId", SessionId},
+        {"MagicExitMsg", MagicExitMsg},
+    };
+    if (ExitDesc.has_value())
+      I.push_back(json::Value({"ExitCode", ExitDesc->ExitCode}));
+    return json::Object{
+        {"Exit", std::move(I)},
+    };
+  }
+};
+
+struct CustomTelemetryEvent : public VendorCommonTelemetryInfo {
+  std::vector<std::string> Msgs;
+
+  CustomTelemetryEvent() = default;
+  CustomTelemetryEvent(const CustomTelemetryEvent &E) = default;
+
+  void serializeToStream(llvm::raw_ostream &OS) const override {
+    OS << "SessionId:" << SessionId << "\n";
+    int I = 0;
+    for (const std::string &M : Msgs) {
+      OS << "MSG_" << I << ":" << M << "\n";
+      ++I;
+    }
+  }
+
+  json::Object serializeToJson() const override {
+    json::Object Inner;
+    Inner.try_emplace("SessionId", SessionId);
+    int I = 0;
+    for (const std::string &M : Msgs) {
+      Inner.try_emplace(("MSG_" + llvm::Twine(I)).str(), M);
+      ++I;
+    }
+
+    return json::Object{{"Midpoint", std::move(Inner)}};
+  }
+};
+
+// The following classes demonstrate how downstream code can
+// define one or more custom Destination(s) to handle
+// Telemetry data differently, specifically:
+//    + which data to send (fullset or sanitized)
+//    + where to send the data
+//    + in what form
+
+static constexpr llvm::StringLiteral STRING_DEST("STRING");
+static constexpr llvm::StringLiteral JSON_DEST("JSON");
+
+// This Destination sends data to a std::string given at ctor.
+class StringDestination : public Destination {
+public:
+  // ShouldSanitize: if true, sanitize the data before emitting, otherwise, emit
+  // the full set.
+  StringDestination(bool ShouldSanitize, std::string &Buf)
+      : ShouldSanitize(ShouldSanitize), OS(Buf) {}
+
+  Error emitEntry(const TelemetryInfo *Entry) override {
+    if (isa<VendorCommonTelemetryInfo>(Entry)) {
+      if (auto *E = dyn_cast<VendorCommonTelemetryInfo>(Entry)) {
+        if (ShouldSanitize) {
+          if (isa<StartupEvent>(E) || isa<ExitEvent>(E)) {
+            // There is nothing to sanitize for this type of data, so keep
+            // as-is.
+            E->serializeToStream(OS);
+          } else if (isa<CustomTelemetryEvent>(E)) {
+            auto Sanitized = sanitizeFields(dyn_cast<CustomTelemetryEvent>(E));
+            Sanitized.serializeToStream(OS);
+          } else {
+            llvm_unreachable("unexpected type");
+          }
+        } else {
+          E->serializeToStream(OS);
+        }
+      }
+    } else {
+      // Unfamiliar entries, just send the entry's UUID
+      OS << "SessionId:" << Entry->SessionId << "\n";
+    }
+    return Error::success();
+  }
+
+  llvm::StringLiteral name() const override { return STRING_DEST; }
+
+private:
+  // Returns a copy of the given entry, but with some fields sanitized.
+  CustomTelemetryEvent sanitizeFields(const CustomTelemetryEvent *Entry) {
+    CustomTelemetryEvent Sanitized(*Entry);
+    // Pretend that messages stored at ODD positions are "sensitive",
+    // hence need to be sanitized away.
+    int S = Sanitized.Msgs.size() - 1;
+    for (int I = S % 2 == 0 ? S - 1 : S; I >= 0; I -= 2)
+      Sanitized.Msgs[I] = "";
+    return Sanitized;
+  }
+
+  bool ShouldSanitize;
+  llvm::raw_string_ostream OS;
+};
+
+// This Destination sends data to some "blackbox" in form of JSON.
+class JsonStreamDestination : public Destination {
+public:
+  JsonStreamDestination(bool ShouldSanitize, TestContext *Ctxt)
+      : ShouldSanitize(ShouldSanitize), CurrentContext(Ctxt) {}
+
+  Error emitEntry(const TelemetryInfo *Entry) override {
+    if (auto *E = dyn_cast<VendorCommonTelemetryInfo>(Entry)) {
+      if (ShouldSanitize) {
+        if (isa<StartupEvent>(E) || isa<ExitEvent>(E)) {
+          // There is nothing to sanitize for this type of data, so keep as-is.
+          return SendToBlackbox(E->serializeToJson());
+        }
+        if (isa<CustomTelemetryEvent>(E)) {
+          auto Sanitized = sanitizeFields(dyn_cast<CustomTelemetryEvent>(E));
+          return SendToBlackbox(Sanitized.serializeToJson());
+        }
+        llvm_unreachable("unexpected type");
+      }
+      return SendToBlackbox(E->serializeToJson());
+    }
+    // Unfamiliar entries, just send the entry's ID
+    return SendToBlackbox(json::Object{{"SessionId", Entry->SessionId}});
+  }
+
+  llvm::StringLiteral name() const override { return JSON_DEST; }
+
+private:
+  // Returns a copy of the given entry, but with some fields sanitized.
+  CustomTelemetryEvent sanitizeFields(const CustomTelemetryEvent *Entry) {
+    CustomTelemetryEvent Sanitized(*Entry);
+    // Pretend that messages stored at EVEN positions are "sensitive",
+    // hence need to be sanitized away.
+    int S = Sanitized.Msgs.size() - 1;
+    for (int I = S % 2 == 0 ? S : S - 1; I >= 0; I -= 2)
+      Sanitized.Msgs[I] = "";
+
+    return Sanitized;
+  }
+
+  llvm::Error SendToBlackbox(json::Object O) {
+    // Here is where the vendor-defined Destination class can
+    // send the data to some internal storage.
+    // For testing purposes, we just queue up the entries to
+    // the vector for validation.
+    CurrentContext->EmittedJsons.push_back(std::move(O));
+    return Error::success();
+  }
+  bool ShouldSanitize;
+  TestContext *CurrentContext;
+};
+
+// Custom vendor-defined Telemeter that has additional data-collection point.
+class TestTelemeter : public Telemeter {
+public:
+  TestTelemeter(std::string SessionId) : Uuid(SessionId), Counter(0) {}
+
+  static std::unique_ptr<TestTelemeter>
+  createInstance(Config *config, TestContext *CurrentContext) {
+    if (!config->EnableTelemetry)
+      return nullptr;
+    CurrentContext->ExpectedUuid = nextUuid();
+    std::unique_ptr<TestTelemeter> Telemeter =
+        std::make_unique<TestTelemeter>(CurrentContext->ExpectedUuid);
+    // Set up Destination based on the given config.
+    for (const std::string &Dest : config->AdditionalDestinations) {
+      // The destination(s) are ALSO defined by vendor, so it should understand
+      // what the name of each destination signifies.
+      if (llvm::StringRef(Dest) == JSON_DEST) {
+        Telemeter->addDestination(
+            std::make_unique<vendor_code::JsonStreamDestination>(
+                CurrentContext->SanitizeData, CurrentContext));
+      } else if (llvm::StringRef(Dest) == STRING_DEST) {
+        Telemeter->addDestination(
+            std::make_unique<vendor_code::StringDestination>(
+                CurrentContext->SanitizeData, CurrentContext->Buffer));
+      } else {
+        llvm_unreachable(
+            llvm::Twine("unknown destination: ", Dest).str().c_str());
+      }
+    }
+    Telemeter->CurrentContext = CurrentContext;
----------------
labath wrote:

pass this as a constructor argument?

https://github.com/llvm/llvm-project/pull/102323


More information about the llvm-commits mailing list