[llvm] [llvm]Add a simple Telemetry framework (PR #102323)
Vy Nguyen via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 25 18:10:45 PDT 2024
================
@@ -0,0 +1,707 @@
+//===- llvm/unittest/Telemetry/TelemetryTest.cpp - Telemetry unittests ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Telemetry/Telemetry.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/JSON.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/raw_ostream.h"
+#include "gtest/gtest.h"
+#include <chrono>
+#include <ctime>
+#include <vector>
+
+// Testing parameters.
+// These are set by each test to force certain outcomes.
+// Since the tests may run in parallel, each test will have
+// its own TestContext populated.
+struct TestContext {
+ // Controlling whether there should be an Exit error (if so, what the
+ // expected exit message/description should be).
+ bool HasExitError = false;
+ std::string ExitMsg = "";
+
+ // Controlling whether there is a vendor-provided config for
+ // Telemetry.
+ bool HasVendorConfig = false;
+
+ // Controlling whether the data should be sanitized.
+ bool SanitizeData = false;
+
+ // These two fields data emitted by the framework for later
+ // verifications by the tests.
+ std::string Buffer = "";
+ std::vector<llvm::json::Object> EmittedJsons;
+
+ // The expected Uuid generated by the fake tool.
+ std::string ExpectedUuid = "";
+};
+
+// This is set by the test body.
+static thread_local TestContext *CurrentContext = nullptr;
+
+namespace llvm {
+namespace telemetry {
+namespace vendor_code {
+
+// Generate unique (but deterministic "uuid" for testing purposes).
+static std::string nextUuid() {
+ static std::atomic<int> seed = 1111;
+ return std::to_string(seed.fetch_add(1, std::memory_order_acquire));
+}
+
+struct VendorEntryKind {
+ static const KindType VendorCommon = 168; // 0b010101000
+ static const KindType Startup = 169; // 0b010101001
+ static const KindType Exit = 170; // 0b010101010
+};
+
+// Describes the exit signal of an event.
+// This is used by TelemetryInfo below.
+struct ExitDescription {
+ int ExitCode;
+ std::string Description;
+};
+
+// Defines a convenient type for timestamp of various events.
+// This is used by the EventStats below.
+using SteadyTimePoint = std::chrono::time_point<std::chrono::steady_clock>;
+
+// Various time (and possibly memory) statistics of an event.
+struct EventStats {
+ // REQUIRED: Start time of an event
+ SteadyTimePoint Start;
+ // OPTIONAL: End time of an event - may be empty if not meaningful.
+ std::optional<SteadyTimePoint> End;
+ // TBD: could add some memory stats here too?
+
+ EventStats() = default;
+ EventStats(SteadyTimePoint Start) : Start(Start) {}
+ EventStats(SteadyTimePoint Start, SteadyTimePoint End)
+ : Start(Start), End(End) {}
+};
+
+// Demonstrates that the TelemetryInfo (data courier) struct can be extended
+// by downstream code to store additional data as needed.
+// It can also define additional data serialization method.
+struct VendorCommonTelemetryInfo : public TelemetryInfo {
+ static bool classof(const TelemetryInfo *T) {
+ if (T == nullptr)
+ return false;
+ // Subclasses of this is also acceptable.
+ return (T->getKind() & VendorEntryKind::VendorCommon) ==
+ VendorEntryKind::VendorCommon;
+ }
+
+ KindType getKind() const override { return VendorEntryKind::VendorCommon; }
+
+ virtual void serializeToStream(llvm::raw_ostream &OS) const = 0;
+
+ std::optional<ExitDescription> ExitDesc;
+ EventStats Stats;
+ size_t Counter;
+};
+
+struct StartupEvent : public VendorCommonTelemetryInfo {
+ std::string MagicStartupMsg;
+
+ StartupEvent() = default;
+ StartupEvent(const StartupEvent &E) {
+ SessionId = E.SessionId;
+ Stats = E.Stats;
+ ExitDesc = E.ExitDesc;
+ Counter = E.Counter;
+
+ MagicStartupMsg = E.MagicStartupMsg;
+ }
+
+ static bool classof(const TelemetryInfo *T) {
+ if (T == nullptr)
+ return false;
+ return T->getKind() == VendorEntryKind::Startup;
+ }
+
+ KindType getKind() const override { return VendorEntryKind::Startup; }
+
+ void serializeToStream(llvm::raw_ostream &OS) const override {
+ OS << "SessionId:" << SessionId << "\n";
+ OS << "MagicStartupMsg:" << MagicStartupMsg << "\n";
+ }
+
+ json::Object serializeToJson() const override {
+ return json::Object{
+ {"Startup",
+ {{"SessionId", SessionId}, {"MagicStartupMsg", MagicStartupMsg}}},
+ };
+ }
+};
+
+struct ExitEvent : public VendorCommonTelemetryInfo {
+ std::string MagicExitMsg;
+
+ ExitEvent() = default;
+ // Provide a copy ctor because we may need to make a copy
+ // before sanitizing the Entry.
+ ExitEvent(const ExitEvent &E) {
+ SessionId = E.SessionId;
+ Stats = E.Stats;
+ ExitDesc = E.ExitDesc;
+ Counter = E.Counter;
+
+ MagicExitMsg = E.MagicExitMsg;
+ }
+
+ static bool classof(const TelemetryInfo *T) {
+ if (T == nullptr)
+ return false;
+ return T->getKind() == VendorEntryKind::Exit;
+ }
+
+ unsigned getKind() const override { return VendorEntryKind::Exit; }
+
+ void serializeToStream(llvm::raw_ostream &OS) const override {
+ OS << "SessionId:" << SessionId << "\n";
+ if (ExitDesc.has_value())
+ OS << "ExitCode:" << ExitDesc->ExitCode << "\n";
+ OS << "MagicExitMsg:" << MagicExitMsg << "\n";
+ }
+
+ json::Object serializeToJson() const override {
+ json::Array I = json::Array{
+ {"SessionId", SessionId},
+ {"MagicExitMsg", MagicExitMsg},
+ };
+ if (ExitDesc.has_value())
+ I.push_back(json::Value({"ExitCode", ExitDesc->ExitCode}));
+ return json::Object{
+ {"Exit", std::move(I)},
+ };
+ }
+};
+
+struct CustomTelemetryEvent : public VendorCommonTelemetryInfo {
+ std::vector<std::string> Msgs;
+
+ CustomTelemetryEvent() = default;
+ CustomTelemetryEvent(const CustomTelemetryEvent &E) {
+ SessionId = E.SessionId;
+ Stats = E.Stats;
+ ExitDesc = E.ExitDesc;
+ Counter = E.Counter;
+
+ Msgs = E.Msgs;
+ }
+
+ void serializeToStream(llvm::raw_ostream &OS) const override {
+ OS << "SessionId:" << SessionId << "\n";
+ int I = 0;
+ for (const std::string &M : Msgs) {
+ OS << "MSG_" << I << ":" << M << "\n";
+ ++I;
+ }
+ }
+
+ json::Object serializeToJson() const override {
+ json::Object Inner;
+ Inner.try_emplace("SessionId", SessionId);
+ int I = 0;
+ for (const std::string &M : Msgs) {
+ Inner.try_emplace(("MSG_" + llvm::Twine(I)).str(), M);
+ ++I;
+ }
+
+ return json::Object{{"Midpoint", std::move(Inner)}};
+ }
+};
+
+// The following classes demonstrate how downstream code can
+// define one or more custom Destination(s) to handle
+// Telemetry data differently, specifically:
+// + which data to send (fullset or sanitized)
+// + where to send the data
+// + in what form
+
+static constexpr llvm::StringLiteral STRING_DEST("STRING");
+static constexpr llvm::StringLiteral JSON_DEST("JSON");
+
+// This Destination sends data to a std::string given at ctor.
+class StringDestination : public Destination {
+public:
+ // ShouldSanitize: if true, sanitize the data before emitting, otherwise, emit
+ // the full set.
+ StringDestination(bool ShouldSanitize, std::string &Buf)
+ : ShouldSanitize(ShouldSanitize), OS(Buf) {}
+
+ Error emitEntry(const TelemetryInfo *Entry) override {
+ if (isa<VendorCommonTelemetryInfo>(Entry)) {
+ if (auto *E = dyn_cast<VendorCommonTelemetryInfo>(Entry)) {
+ if (ShouldSanitize) {
+ if (isa<StartupEvent>(E) || isa<ExitEvent>(E)) {
+ // There is nothing to sanitize for this type of data, so keep
+ // as-is.
+ E->serializeToStream(OS);
+ } else if (isa<CustomTelemetryEvent>(E)) {
+ auto Sanitized = sanitizeFields(dyn_cast<CustomTelemetryEvent>(E));
+ Sanitized.serializeToStream(OS);
+ } else {
+ llvm_unreachable("unexpected type");
+ }
+ } else {
+ E->serializeToStream(OS);
+ }
+ }
+ } else {
+ // Unfamiliar entries, just send the entry's UUID
+ OS << "SessionId:" << Entry->SessionId << "\n";
+ }
+ return Error::success();
+ }
+
+ llvm::StringLiteral name() const override { return STRING_DEST; }
+
+private:
+ // Returns a copy of the given entry, but with some fields sanitized.
+ CustomTelemetryEvent sanitizeFields(const CustomTelemetryEvent *Entry) {
+ CustomTelemetryEvent Sanitized(*Entry);
+ // Pretend that messages stored at ODD positions are "sensitive",
+ // hence need to be sanitized away.
+ int S = Sanitized.Msgs.size() - 1;
+ for (int I = S % 2 == 0 ? S - 1 : S; I >= 0; I -= 2)
+ Sanitized.Msgs[I] = "";
+ return Sanitized;
+ }
+
+ bool ShouldSanitize;
+ llvm::raw_string_ostream OS;
+};
+
+// This Destination sends data to some "blackbox" in form of JSON.
+class JsonStreamDestination : public Destination {
+public:
+ JsonStreamDestination(bool ShouldSanitize) : ShouldSanitize(ShouldSanitize) {}
+
+ Error emitEntry(const TelemetryInfo *Entry) override {
+ if (auto *E = dyn_cast<VendorCommonTelemetryInfo>(Entry)) {
+ if (ShouldSanitize) {
+ if (isa<StartupEvent>(E) || isa<ExitEvent>(E)) {
+ // There is nothing to sanitize for this type of data, so keep as-is.
+ return SendToBlackbox(E->serializeToJson());
+ } else if (isa<CustomTelemetryEvent>(E)) {
+ auto Sanitized = sanitizeFields(dyn_cast<CustomTelemetryEvent>(E));
+ return SendToBlackbox(Sanitized.serializeToJson());
+ } else {
+ llvm_unreachable("unexpected type");
+ }
+ } else {
+ return SendToBlackbox(E->serializeToJson());
+ }
+ } else {
+ // Unfamiliar entries, just send the entry's ID
+ return SendToBlackbox(json::Object{{"SessionId", Entry->SessionId}});
+ }
+ return make_error<StringError>("unhandled codepath in emitEntry",
+ inconvertibleErrorCode());
+ }
+
+ llvm::StringLiteral name() const override { return JSON_DEST; }
+
+private:
+ // Returns a copy of the given entry, but with some fields sanitized.
+ CustomTelemetryEvent sanitizeFields(const CustomTelemetryEvent *Entry) {
+ CustomTelemetryEvent Sanitized(*Entry);
+ // Pretend that messages stored at EVEN positions are "sensitive",
+ // hence need to be sanitized away.
+ int S = Sanitized.Msgs.size() - 1;
+ for (int I = S % 2 == 0 ? S : S - 1; I >= 0; I -= 2)
+ Sanitized.Msgs[I] = "";
+
+ return Sanitized;
+ }
+
+ llvm::Error SendToBlackbox(json::Object O) {
+ // Here is where the vendor-defined Destination class can
+ // send the data to some internal storage.
+ // For testing purposes, we just queue up the entries to
+ // the vector for validation.
+ CurrentContext->EmittedJsons.push_back(std::move(O));
+ return Error::success();
+ }
+ bool ShouldSanitize;
+};
+
+// Custom vendor-defined Telemeter that has additional data-collection point.
+class TestTelemeter : public Telemeter {
+public:
+ TestTelemeter(std::string SessionId) : Uuid(SessionId), Counter(0) {}
+
+ static std::unique_ptr<TestTelemeter> createInstance(Config *config) {
+ if (!config->EnableTelemetry)
+ return nullptr;
+ CurrentContext->ExpectedUuid = nextUuid();
+ std::unique_ptr<TestTelemeter> Telemeter =
+ std::make_unique<TestTelemeter>(CurrentContext->ExpectedUuid);
+ // Set up Destination based on the given config.
+ for (const std::string &Dest : config->AdditionalDestinations) {
+ // The destination(s) are ALSO defined by vendor, so it should understand
+ // what the name of each destination signifies.
+ if (Dest == JSON_DEST.str()) {
----------------
oontvoo wrote:
done
https://github.com/llvm/llvm-project/pull/102323
More information about the llvm-commits
mailing list