[llvm] [llvm]Add a simple Telemetry framework (PR #102323)
Vy Nguyen via llvm-commits
llvm-commits at lists.llvm.org
Fri Sep 27 06:31:54 PDT 2024
================
@@ -0,0 +1,707 @@
+//===- llvm/unittest/Telemetry/TelemetryTest.cpp - Telemetry unittests ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Telemetry/Telemetry.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/JSON.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/raw_ostream.h"
+#include "gtest/gtest.h"
+#include <chrono>
+#include <ctime>
+#include <vector>
+
+// Testing parameters.
+// These are set by each test to force certain outcomes.
+// Since the tests may run in parallel, each test will have
+// its own TestContext populated.
+struct TestContext {
+ // Controlling whether there should be an Exit error (if so, what the
+ // expected exit message/description should be).
+ bool HasExitError = false;
+ std::string ExitMsg = "";
+
+ // Controlling whether there is a vendor-provided config for
+ // Telemetry.
+ bool HasVendorConfig = false;
+
+ // Controlling whether the data should be sanitized.
+ bool SanitizeData = false;
+
+ // These two fields data emitted by the framework for later
+ // verifications by the tests.
+ std::string Buffer = "";
+ std::vector<llvm::json::Object> EmittedJsons;
+
+ // The expected Uuid generated by the fake tool.
+ std::string ExpectedUuid = "";
+};
+
+// This is set by the test body.
+static thread_local TestContext *CurrentContext = nullptr;
+
+namespace llvm {
+namespace telemetry {
+namespace vendor_code {
+
+// Generate unique (but deterministic "uuid" for testing purposes).
+static std::string nextUuid() {
+ static std::atomic<int> seed = 1111;
+ return std::to_string(seed.fetch_add(1, std::memory_order_acquire));
+}
+
+struct VendorEntryKind {
+ static const KindType VendorCommon = 168; // 0b010101000
+ static const KindType Startup = 169; // 0b010101001
+ static const KindType Exit = 170; // 0b010101010
+};
+
+// Describes the exit signal of an event.
+// This is used by TelemetryInfo below.
+struct ExitDescription {
+ int ExitCode;
+ std::string Description;
+};
+
+// Defines a convenient type for timestamp of various events.
+// This is used by the EventStats below.
+using SteadyTimePoint = std::chrono::time_point<std::chrono::steady_clock>;
+
+// Various time (and possibly memory) statistics of an event.
+struct EventStats {
+ // REQUIRED: Start time of an event
+ SteadyTimePoint Start;
+ // OPTIONAL: End time of an event - may be empty if not meaningful.
+ std::optional<SteadyTimePoint> End;
+ // TBD: could add some memory stats here too?
+
+ EventStats() = default;
+ EventStats(SteadyTimePoint Start) : Start(Start) {}
+ EventStats(SteadyTimePoint Start, SteadyTimePoint End)
+ : Start(Start), End(End) {}
+};
+
+// Demonstrates that the TelemetryInfo (data courier) struct can be extended
+// by downstream code to store additional data as needed.
+// It can also define additional data serialization method.
+struct VendorCommonTelemetryInfo : public TelemetryInfo {
+ static bool classof(const TelemetryInfo *T) {
+ if (T == nullptr)
+ return false;
+ // Subclasses of this is also acceptable.
+ return (T->getKind() & VendorEntryKind::VendorCommon) ==
+ VendorEntryKind::VendorCommon;
+ }
+
+ KindType getKind() const override { return VendorEntryKind::VendorCommon; }
+
+ virtual void serializeToStream(llvm::raw_ostream &OS) const = 0;
+
+ std::optional<ExitDescription> ExitDesc;
+ EventStats Stats;
+ size_t Counter;
+};
+
+struct StartupEvent : public VendorCommonTelemetryInfo {
+ std::string MagicStartupMsg;
+
+ StartupEvent() = default;
+ StartupEvent(const StartupEvent &E) {
+ SessionId = E.SessionId;
+ Stats = E.Stats;
+ ExitDesc = E.ExitDesc;
+ Counter = E.Counter;
+
+ MagicStartupMsg = E.MagicStartupMsg;
+ }
+
+ static bool classof(const TelemetryInfo *T) {
+ if (T == nullptr)
+ return false;
+ return T->getKind() == VendorEntryKind::Startup;
+ }
+
+ KindType getKind() const override { return VendorEntryKind::Startup; }
+
+ void serializeToStream(llvm::raw_ostream &OS) const override {
+ OS << "SessionId:" << SessionId << "\n";
+ OS << "MagicStartupMsg:" << MagicStartupMsg << "\n";
+ }
+
+ json::Object serializeToJson() const override {
+ return json::Object{
+ {"Startup",
+ {{"SessionId", SessionId}, {"MagicStartupMsg", MagicStartupMsg}}},
+ };
+ }
+};
+
+struct ExitEvent : public VendorCommonTelemetryInfo {
+ std::string MagicExitMsg;
+
+ ExitEvent() = default;
+ // Provide a copy ctor because we may need to make a copy
+ // before sanitizing the Entry.
+ ExitEvent(const ExitEvent &E) {
+ SessionId = E.SessionId;
+ Stats = E.Stats;
+ ExitDesc = E.ExitDesc;
+ Counter = E.Counter;
+
+ MagicExitMsg = E.MagicExitMsg;
+ }
+
+ static bool classof(const TelemetryInfo *T) {
+ if (T == nullptr)
+ return false;
+ return T->getKind() == VendorEntryKind::Exit;
+ }
+
+ unsigned getKind() const override { return VendorEntryKind::Exit; }
+
+ void serializeToStream(llvm::raw_ostream &OS) const override {
+ OS << "SessionId:" << SessionId << "\n";
+ if (ExitDesc.has_value())
+ OS << "ExitCode:" << ExitDesc->ExitCode << "\n";
+ OS << "MagicExitMsg:" << MagicExitMsg << "\n";
+ }
+
+ json::Object serializeToJson() const override {
+ json::Array I = json::Array{
+ {"SessionId", SessionId},
+ {"MagicExitMsg", MagicExitMsg},
+ };
+ if (ExitDesc.has_value())
+ I.push_back(json::Value({"ExitCode", ExitDesc->ExitCode}));
+ return json::Object{
+ {"Exit", std::move(I)},
+ };
+ }
+};
+
+struct CustomTelemetryEvent : public VendorCommonTelemetryInfo {
+ std::vector<std::string> Msgs;
+
+ CustomTelemetryEvent() = default;
+ CustomTelemetryEvent(const CustomTelemetryEvent &E) {
+ SessionId = E.SessionId;
+ Stats = E.Stats;
+ ExitDesc = E.ExitDesc;
+ Counter = E.Counter;
+
+ Msgs = E.Msgs;
+ }
+
+ void serializeToStream(llvm::raw_ostream &OS) const override {
+ OS << "SessionId:" << SessionId << "\n";
+ int I = 0;
+ for (const std::string &M : Msgs) {
+ OS << "MSG_" << I << ":" << M << "\n";
+ ++I;
+ }
+ }
+
+ json::Object serializeToJson() const override {
+ json::Object Inner;
+ Inner.try_emplace("SessionId", SessionId);
+ int I = 0;
+ for (const std::string &M : Msgs) {
+ Inner.try_emplace(("MSG_" + llvm::Twine(I)).str(), M);
+ ++I;
+ }
+
+ return json::Object{{"Midpoint", std::move(Inner)}};
+ }
+};
+
+// The following classes demonstrate how downstream code can
+// define one or more custom Destination(s) to handle
+// Telemetry data differently, specifically:
+// + which data to send (fullset or sanitized)
+// + where to send the data
+// + in what form
+
+static constexpr llvm::StringLiteral STRING_DEST("STRING");
+static constexpr llvm::StringLiteral JSON_DEST("JSON");
+
+// This Destination sends data to a std::string given at ctor.
+class StringDestination : public Destination {
+public:
+ // ShouldSanitize: if true, sanitize the data before emitting, otherwise, emit
+ // the full set.
+ StringDestination(bool ShouldSanitize, std::string &Buf)
+ : ShouldSanitize(ShouldSanitize), OS(Buf) {}
+
+ Error emitEntry(const TelemetryInfo *Entry) override {
+ if (isa<VendorCommonTelemetryInfo>(Entry)) {
+ if (auto *E = dyn_cast<VendorCommonTelemetryInfo>(Entry)) {
+ if (ShouldSanitize) {
+ if (isa<StartupEvent>(E) || isa<ExitEvent>(E)) {
+ // There is nothing to sanitize for this type of data, so keep
+ // as-is.
+ E->serializeToStream(OS);
+ } else if (isa<CustomTelemetryEvent>(E)) {
+ auto Sanitized = sanitizeFields(dyn_cast<CustomTelemetryEvent>(E));
+ Sanitized.serializeToStream(OS);
+ } else {
+ llvm_unreachable("unexpected type");
+ }
+ } else {
+ E->serializeToStream(OS);
+ }
+ }
+ } else {
+ // Unfamiliar entries, just send the entry's UUID
+ OS << "SessionId:" << Entry->SessionId << "\n";
+ }
+ return Error::success();
+ }
+
+ llvm::StringLiteral name() const override { return STRING_DEST; }
+
+private:
+ // Returns a copy of the given entry, but with some fields sanitized.
+ CustomTelemetryEvent sanitizeFields(const CustomTelemetryEvent *Entry) {
+ CustomTelemetryEvent Sanitized(*Entry);
+ // Pretend that messages stored at ODD positions are "sensitive",
+ // hence need to be sanitized away.
+ int S = Sanitized.Msgs.size() - 1;
+ for (int I = S % 2 == 0 ? S - 1 : S; I >= 0; I -= 2)
+ Sanitized.Msgs[I] = "";
+ return Sanitized;
+ }
+
+ bool ShouldSanitize;
+ llvm::raw_string_ostream OS;
+};
+
+// This Destination sends data to some "blackbox" in form of JSON.
+class JsonStreamDestination : public Destination {
+public:
+ JsonStreamDestination(bool ShouldSanitize) : ShouldSanitize(ShouldSanitize) {}
+
+ Error emitEntry(const TelemetryInfo *Entry) override {
+ if (auto *E = dyn_cast<VendorCommonTelemetryInfo>(Entry)) {
+ if (ShouldSanitize) {
+ if (isa<StartupEvent>(E) || isa<ExitEvent>(E)) {
+ // There is nothing to sanitize for this type of data, so keep as-is.
+ return SendToBlackbox(E->serializeToJson());
+ } else if (isa<CustomTelemetryEvent>(E)) {
+ auto Sanitized = sanitizeFields(dyn_cast<CustomTelemetryEvent>(E));
+ return SendToBlackbox(Sanitized.serializeToJson());
+ } else {
+ llvm_unreachable("unexpected type");
+ }
+ } else {
+ return SendToBlackbox(E->serializeToJson());
+ }
+ } else {
+ // Unfamiliar entries, just send the entry's ID
+ return SendToBlackbox(json::Object{{"SessionId", Entry->SessionId}});
+ }
+ return make_error<StringError>("unhandled codepath in emitEntry",
+ inconvertibleErrorCode());
+ }
+
+ llvm::StringLiteral name() const override { return JSON_DEST; }
+
+private:
+ // Returns a copy of the given entry, but with some fields sanitized.
+ CustomTelemetryEvent sanitizeFields(const CustomTelemetryEvent *Entry) {
+ CustomTelemetryEvent Sanitized(*Entry);
+ // Pretend that messages stored at EVEN positions are "sensitive",
+ // hence need to be sanitized away.
+ int S = Sanitized.Msgs.size() - 1;
+ for (int I = S % 2 == 0 ? S : S - 1; I >= 0; I -= 2)
+ Sanitized.Msgs[I] = "";
+
+ return Sanitized;
+ }
+
+ llvm::Error SendToBlackbox(json::Object O) {
+ // Here is where the vendor-defined Destination class can
+ // send the data to some internal storage.
+ // For testing purposes, we just queue up the entries to
+ // the vector for validation.
+ CurrentContext->EmittedJsons.push_back(std::move(O));
+ return Error::success();
+ }
+ bool ShouldSanitize;
+};
+
+// Custom vendor-defined Telemeter that has additional data-collection point.
+class TestTelemeter : public Telemeter {
+public:
+ TestTelemeter(std::string SessionId) : Uuid(SessionId), Counter(0) {}
+
+ static std::unique_ptr<TestTelemeter> createInstance(Config *config) {
+ if (!config->EnableTelemetry)
+ return nullptr;
+ CurrentContext->ExpectedUuid = nextUuid();
+ std::unique_ptr<TestTelemeter> Telemeter =
+ std::make_unique<TestTelemeter>(CurrentContext->ExpectedUuid);
+ // Set up Destination based on the given config.
+ for (const std::string &Dest : config->AdditionalDestinations) {
+ // The destination(s) are ALSO defined by vendor, so it should understand
+ // what the name of each destination signifies.
+ if (Dest == JSON_DEST.str()) {
+ Telemeter->addDestination(new vendor_code::JsonStreamDestination(
+ CurrentContext->SanitizeData));
+ } else if (Dest == STRING_DEST.str()) {
+ Telemeter->addDestination(new vendor_code::StringDestination(
+ CurrentContext->SanitizeData, CurrentContext->Buffer));
+ } else {
+ llvm_unreachable(
+ llvm::Twine("unknown destination: ", Dest).str().c_str());
+ }
+ }
+ return Telemeter;
+ }
+
+ void atStartup(llvm::StringRef ToolPath, TelemetryInfo *Entry) override {
+ ToolName = ToolPath.str();
+
+ // The vendor can add additional stuff to the entry before logging.
+ if (auto *S = dyn_cast<StartupEvent>(Entry)) {
+ S->MagicStartupMsg = llvm::Twine("Startup_", ToolPath).str();
+ }
+ emitToDestinations(Entry);
+ }
+
+ void atExit(llvm::StringRef ToolPath, TelemetryInfo *Entry) override {
+ // Ensure we're shutting down the same tool we started with.
+ if (ToolPath != ToolName) {
+ std::string Str;
+ raw_string_ostream OS(Str);
+ OS << "Expected tool with name" << ToolName << ", but got " << ToolPath;
+ llvm_unreachable(Str.c_str());
+ }
+
+ // The vendor can add additional stuff to the entry before logging.
+ if (auto *E = dyn_cast<ExitEvent>(Entry)) {
----------------
oontvoo wrote:
I think this goes back to the question of how much can different users actually share.
We have two camps here:
- One: nothing much (which is why we didn't actually need common API in LLVM to begin with)
- Two: some common behaviours (eg., startup, exit)
It seems the decision was to go with two. (I'm kind of in camp ONE since the beginning, but if we've decided to go with TWO, then there should at least some common methods in the base class. Otherwise it looks pointless).
WDYT?
https://github.com/llvm/llvm-project/pull/102323
More information about the llvm-commits
mailing list