[llvm] [llvm]Add a simple Telemetry framework (PR #102323)
Vy Nguyen via llvm-commits
llvm-commits at lists.llvm.org
Fri Sep 27 06:35:06 PDT 2024
================
@@ -0,0 +1,687 @@
+//===- llvm/unittest/Telemetry/TelemetryTest.cpp - Telemetry unittests ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Telemetry/Telemetry.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/JSON.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/raw_ostream.h"
+#include "gtest/gtest.h"
+#include <chrono>
+#include <ctime>
+#include <vector>
+
+// Testing parameters.
+// These are set by each test to force certain outcomes.
+// Since the tests may run in parallel, each test will have
+// its own TestContext populated.
+struct TestContext {
+ // Controlling whether there should be an Exit error (if so, what the
+ // expected exit message/description should be).
+ bool HasExitError = false;
+ std::string ExitMsg = "";
+
+ // Controlling whether there is a vendor-provided config for
+ // Telemetry.
+ bool HasVendorConfig = false;
+
+ // Controlling whether the data should be sanitized.
+ bool SanitizeData = false;
+
+ // These two fields data emitted by the framework for later
+ // verifications by the tests.
+ std::string Buffer = "";
+ std::vector<llvm::json::Object> EmittedJsons;
+
+ // The expected Uuid generated by the fake tool.
+ std::string ExpectedUuid = "";
+};
+
+namespace llvm {
+namespace telemetry {
+namespace vendor_code {
+
+// Generate unique (but deterministic "uuid" for testing purposes).
+static std::string nextUuid() {
+ static std::atomic<int> seed = 1111;
+ return std::to_string(seed.fetch_add(1, std::memory_order_acquire));
+}
+
+struct VendorEntryKind {
+ static const KindType VendorCommon = 168; // 0b010101000
+ static const KindType Startup = 169; // 0b010101001
+ static const KindType Exit = 170; // 0b010101010
+};
+
+// Describes the exit signal of an event.
+// This is used by TelemetryInfo below.
+struct ExitDescription {
+ int ExitCode;
+ std::string Description;
+};
+
+// Defines a convenient type for timestamp of various events.
+// This is used by the EventStats below.
+using SteadyTimePoint = std::chrono::time_point<std::chrono::steady_clock>;
+
+// Various time (and possibly memory) statistics of an event.
+struct EventStats {
+ // REQUIRED: Start time of an event
+ SteadyTimePoint Start;
+ // OPTIONAL: End time of an event - may be empty if not meaningful.
+ std::optional<SteadyTimePoint> End;
+ // TBD: could add some memory stats here too?
+
+ EventStats() = default;
+ EventStats(SteadyTimePoint Start) : Start(Start) {}
+ EventStats(SteadyTimePoint Start, SteadyTimePoint End)
+ : Start(Start), End(End) {}
+};
+
+// Demonstrates that the TelemetryInfo (data courier) struct can be extended
+// by downstream code to store additional data as needed.
+// It can also define additional data serialization method.
+struct VendorCommonTelemetryInfo : public TelemetryInfo {
+ static bool classof(const TelemetryInfo *T) {
+ if (T == nullptr)
+ return false;
+ // Subclasses of this is also acceptable.
+ return (T->getKind() & VendorEntryKind::VendorCommon) ==
+ VendorEntryKind::VendorCommon;
+ }
+
+ KindType getKind() const override { return VendorEntryKind::VendorCommon; }
+
+ virtual void serializeToStream(llvm::raw_ostream &OS) const = 0;
+
+ std::optional<ExitDescription> ExitDesc;
+ EventStats Stats;
+ size_t Counter;
+};
+
+struct StartupEvent : public VendorCommonTelemetryInfo {
+ std::string MagicStartupMsg;
+
+ StartupEvent() = default;
+ StartupEvent(const StartupEvent &E) = default;
+
+ static bool classof(const TelemetryInfo *T) {
+ if (T == nullptr)
+ return false;
+ return T->getKind() == VendorEntryKind::Startup;
+ }
+
+ KindType getKind() const override { return VendorEntryKind::Startup; }
+
+ void serializeToStream(llvm::raw_ostream &OS) const override {
+ OS << "SessionId:" << SessionId << "\n";
+ OS << "MagicStartupMsg:" << MagicStartupMsg << "\n";
+ }
+
+ json::Object serializeToJson() const override {
+ return json::Object{
+ {"Startup",
+ {{"SessionId", SessionId}, {"MagicStartupMsg", MagicStartupMsg}}},
+ };
+ }
+};
+
+struct ExitEvent : public VendorCommonTelemetryInfo {
+ std::string MagicExitMsg;
+
+ ExitEvent() = default;
+ // Provide a copy ctor because we may need to make a copy
+ // before sanitizing the Entry.
+ ExitEvent(const ExitEvent &E) = default;
+
+ static bool classof(const TelemetryInfo *T) {
+ if (T == nullptr)
+ return false;
+ return T->getKind() == VendorEntryKind::Exit;
+ }
+
+ unsigned getKind() const override { return VendorEntryKind::Exit; }
+
+ void serializeToStream(llvm::raw_ostream &OS) const override {
+ OS << "SessionId:" << SessionId << "\n";
+ if (ExitDesc.has_value())
+ OS << "ExitCode:" << ExitDesc->ExitCode << "\n";
+ OS << "MagicExitMsg:" << MagicExitMsg << "\n";
+ }
+
+ json::Object serializeToJson() const override {
+ json::Array I = json::Array{
+ {"SessionId", SessionId},
+ {"MagicExitMsg", MagicExitMsg},
+ };
+ if (ExitDesc.has_value())
+ I.push_back(json::Value({"ExitCode", ExitDesc->ExitCode}));
+ return json::Object{
+ {"Exit", std::move(I)},
+ };
+ }
+};
+
+struct CustomTelemetryEvent : public VendorCommonTelemetryInfo {
+ std::vector<std::string> Msgs;
+
+ CustomTelemetryEvent() = default;
+ CustomTelemetryEvent(const CustomTelemetryEvent &E) = default;
+
+ void serializeToStream(llvm::raw_ostream &OS) const override {
+ OS << "SessionId:" << SessionId << "\n";
+ int I = 0;
+ for (const std::string &M : Msgs) {
+ OS << "MSG_" << I << ":" << M << "\n";
+ ++I;
+ }
+ }
+
+ json::Object serializeToJson() const override {
+ json::Object Inner;
+ Inner.try_emplace("SessionId", SessionId);
+ int I = 0;
+ for (const std::string &M : Msgs) {
+ Inner.try_emplace(("MSG_" + llvm::Twine(I)).str(), M);
+ ++I;
+ }
+
+ return json::Object{{"Midpoint", std::move(Inner)}};
+ }
+};
+
+// The following classes demonstrate how downstream code can
+// define one or more custom Destination(s) to handle
+// Telemetry data differently, specifically:
+// + which data to send (fullset or sanitized)
+// + where to send the data
+// + in what form
+
+static constexpr llvm::StringLiteral STRING_DEST("STRING");
+static constexpr llvm::StringLiteral JSON_DEST("JSON");
+
+// This Destination sends data to a std::string given at ctor.
+class StringDestination : public Destination {
+public:
+ // ShouldSanitize: if true, sanitize the data before emitting, otherwise, emit
+ // the full set.
+ StringDestination(bool ShouldSanitize, std::string &Buf)
+ : ShouldSanitize(ShouldSanitize), OS(Buf) {}
+
+ Error emitEntry(const TelemetryInfo *Entry) override {
+ if (isa<VendorCommonTelemetryInfo>(Entry)) {
+ if (auto *E = dyn_cast<VendorCommonTelemetryInfo>(Entry)) {
+ if (ShouldSanitize) {
+ if (isa<StartupEvent>(E) || isa<ExitEvent>(E)) {
+ // There is nothing to sanitize for this type of data, so keep
+ // as-is.
+ E->serializeToStream(OS);
+ } else if (isa<CustomTelemetryEvent>(E)) {
+ auto Sanitized = sanitizeFields(dyn_cast<CustomTelemetryEvent>(E));
+ Sanitized.serializeToStream(OS);
+ } else {
+ llvm_unreachable("unexpected type");
+ }
+ } else {
+ E->serializeToStream(OS);
+ }
+ }
+ } else {
+ // Unfamiliar entries, just send the entry's UUID
+ OS << "SessionId:" << Entry->SessionId << "\n";
+ }
+ return Error::success();
+ }
+
+ llvm::StringLiteral name() const override { return STRING_DEST; }
+
+private:
+ // Returns a copy of the given entry, but with some fields sanitized.
+ CustomTelemetryEvent sanitizeFields(const CustomTelemetryEvent *Entry) {
+ CustomTelemetryEvent Sanitized(*Entry);
+ // Pretend that messages stored at ODD positions are "sensitive",
+ // hence need to be sanitized away.
+ int S = Sanitized.Msgs.size() - 1;
+ for (int I = S % 2 == 0 ? S - 1 : S; I >= 0; I -= 2)
+ Sanitized.Msgs[I] = "";
+ return Sanitized;
+ }
+
+ bool ShouldSanitize;
+ llvm::raw_string_ostream OS;
+};
+
+// This Destination sends data to some "blackbox" in form of JSON.
+class JsonStreamDestination : public Destination {
+public:
+ JsonStreamDestination(bool ShouldSanitize, TestContext *Ctxt)
+ : ShouldSanitize(ShouldSanitize), CurrentContext(Ctxt) {}
+
+ Error emitEntry(const TelemetryInfo *Entry) override {
+ if (auto *E = dyn_cast<VendorCommonTelemetryInfo>(Entry)) {
+ if (ShouldSanitize) {
+ if (isa<StartupEvent>(E) || isa<ExitEvent>(E)) {
+ // There is nothing to sanitize for this type of data, so keep as-is.
+ return SendToBlackbox(E->serializeToJson());
+ }
+ if (isa<CustomTelemetryEvent>(E)) {
+ auto Sanitized = sanitizeFields(dyn_cast<CustomTelemetryEvent>(E));
+ return SendToBlackbox(Sanitized.serializeToJson());
+ }
+ llvm_unreachable("unexpected type");
+ }
+ return SendToBlackbox(E->serializeToJson());
+ }
+ // Unfamiliar entries, just send the entry's ID
+ return SendToBlackbox(json::Object{{"SessionId", Entry->SessionId}});
+ }
+
+ llvm::StringLiteral name() const override { return JSON_DEST; }
+
+private:
+ // Returns a copy of the given entry, but with some fields sanitized.
+ CustomTelemetryEvent sanitizeFields(const CustomTelemetryEvent *Entry) {
+ CustomTelemetryEvent Sanitized(*Entry);
+ // Pretend that messages stored at EVEN positions are "sensitive",
+ // hence need to be sanitized away.
+ int S = Sanitized.Msgs.size() - 1;
+ for (int I = S % 2 == 0 ? S : S - 1; I >= 0; I -= 2)
+ Sanitized.Msgs[I] = "";
+
+ return Sanitized;
+ }
+
+ llvm::Error SendToBlackbox(json::Object O) {
+ // Here is where the vendor-defined Destination class can
+ // send the data to some internal storage.
+ // For testing purposes, we just queue up the entries to
+ // the vector for validation.
+ CurrentContext->EmittedJsons.push_back(std::move(O));
+ return Error::success();
+ }
+ bool ShouldSanitize;
+ TestContext *CurrentContext;
+};
+
+// Custom vendor-defined Telemeter that has additional data-collection point.
+class TestTelemeter : public Telemeter {
+public:
+ TestTelemeter(std::string SessionId) : Uuid(SessionId), Counter(0) {}
+
+ static std::unique_ptr<TestTelemeter>
+ createInstance(Config *config, TestContext *CurrentContext) {
+ if (!config->EnableTelemetry)
+ return nullptr;
+ CurrentContext->ExpectedUuid = nextUuid();
+ std::unique_ptr<TestTelemeter> Telemeter =
+ std::make_unique<TestTelemeter>(CurrentContext->ExpectedUuid);
+ // Set up Destination based on the given config.
+ for (const std::string &Dest : config->AdditionalDestinations) {
+ // The destination(s) are ALSO defined by vendor, so it should understand
+ // what the name of each destination signifies.
+ if (llvm::StringRef(Dest) == JSON_DEST) {
+ Telemeter->addDestination(
+ std::make_unique<vendor_code::JsonStreamDestination>(
+ CurrentContext->SanitizeData, CurrentContext));
+ } else if (llvm::StringRef(Dest) == STRING_DEST) {
+ Telemeter->addDestination(
+ std::make_unique<vendor_code::StringDestination>(
+ CurrentContext->SanitizeData, CurrentContext->Buffer));
+ } else {
+ llvm_unreachable(
+ llvm::Twine("unknown destination: ", Dest).str().c_str());
+ }
+ }
+ Telemeter->CurrentContext = CurrentContext;
+ return Telemeter;
+ }
+
+ void atStartup(llvm::StringRef ToolPath, TelemetryInfo *Entry) override {
+ ToolName = ToolPath.str();
+
+ // The vendor can add additional stuff to the entry before logging.
+ if (auto *S = dyn_cast<StartupEvent>(Entry)) {
+ S->MagicStartupMsg = llvm::Twine("Startup_", ToolPath).str();
+ }
+ emitToDestinations(Entry);
+ }
+
+ void atExit(llvm::StringRef ToolPath, TelemetryInfo *Entry) override {
+ // Ensure we're shutting down the same tool we started with.
+ if (ToolPath != ToolName) {
+ std::string Str;
+ raw_string_ostream OS(Str);
+ OS << "Expected tool with name" << ToolName << ", but got " << ToolPath;
+ llvm_unreachable(Str.c_str());
+ }
+
+ // The vendor can add additional stuff to the entry before logging.
+ if (auto *E = dyn_cast<ExitEvent>(Entry)) {
+ E->MagicExitMsg = llvm::Twine("Exit_", ToolPath).str();
+ }
+
+ emitToDestinations(Entry);
+ }
+
+ void addDestination(std::unique_ptr<Destination> Dest) override {
+ Destinations.push_back(std::move(Dest));
+ }
+
+ void atMidpoint(TelemetryInfo *Entry) {
+ // The custom Telemeter can record and send additional data.
+ if (auto *C = dyn_cast<CustomTelemetryEvent>(Entry)) {
+ C->Msgs.push_back("Two");
+ C->Msgs.push_back("Deux");
+ C->Msgs.push_back("Zwei");
+ }
+
+ emitToDestinations(Entry);
+ }
+
+ const std::string &getUuid() const { return Uuid; }
+
+ ~TestTelemeter() = default;
+
+ template <typename T> T makeDefaultTelemetryInfo() {
+ T Ret;
+ Ret.SessionId = Uuid;
+ Ret.Counter = Counter++;
+ return Ret;
+ }
+
+ TestContext *CurrentContext = nullptr;
+
+private:
+ void emitToDestinations(TelemetryInfo *Entry) {
+ for (const auto &Dest : Destinations) {
+ llvm::Error err = Dest->emitEntry(Entry);
+ if (err) {
+ // Log it and move on.
+ }
+ }
+ }
+
+ const std::string Uuid;
+ size_t Counter;
+ std::string ToolName;
+ std::vector<std::unique_ptr<Destination>> Destinations;
+};
+
+// Pretend to be a "weakly" defined vendor-specific function.
+void ApplyVendorSpecificConfigs(Config *config) {
+ config->EnableTelemetry = true;
+}
+
+} // namespace vendor_code
+} // namespace telemetry
+} // namespace llvm
+
+namespace {
+
+void ApplyCommonConfig(llvm::telemetry::Config *config) {
+ // Any shareable configs for the upstream tool can go here.
+ // .....
+}
+
+std::shared_ptr<llvm::telemetry::Config>
+GetTelemetryConfig(TestContext *CurrentContext) {
+ // Telemetry is disabled by default.
+ // The vendor can enable in their config.
+ auto Config = std::make_shared<llvm::telemetry::Config>();
+ Config->EnableTelemetry = false;
+
+ ApplyCommonConfig(Config.get());
+
+ // Apply vendor specific config, if present.
+ // In principle, this would be a build-time param, configured by the vendor.
+ // Eg:
+ //
+ // #ifdef HAS_VENDOR_TELEMETRY_CONFIG
+ // llvm::telemetry::vendor_code::ApplyVendorSpecificConfigs(config.get());
+ // #endif
+ //
+ // But for unit testing, we use the testing params defined at the top.
+ if (CurrentContext->HasVendorConfig) {
+ llvm::telemetry::vendor_code::ApplyVendorSpecificConfigs(Config.get());
+ }
+ return Config;
+}
+
+using namespace llvm;
+using namespace llvm::telemetry;
+
+// For deterministic tests, pre-defined certain important time-points
+// rather than using now().
+//
+// Preset StartTime to EPOCH.
+auto StartTime = std::chrono::time_point<std::chrono::steady_clock>{};
+// Pretend the time it takes for the tool's initialization is EPOCH + 5
+// milliseconds
+auto InitCompleteTime = StartTime + std::chrono::milliseconds(5);
+auto MidPointTime = StartTime + std::chrono::milliseconds(10);
+auto MidPointCompleteTime = MidPointTime + std::chrono::milliseconds(5);
+// Preset ExitTime to EPOCH + 20 milliseconds
+auto ExitTime = StartTime + std::chrono::milliseconds(20);
+// Pretend the time it takes to complete tearing down the tool is 10
+// milliseconds.
+auto ExitCompleteTime = ExitTime + std::chrono::milliseconds(10);
+
+void AtToolStart(std::string ToolName, vendor_code::TestTelemeter *T) {
+ vendor_code::StartupEvent Entry =
+ T->makeDefaultTelemetryInfo<vendor_code::StartupEvent>();
+ Entry.Stats = {StartTime, InitCompleteTime};
+ T->atStartup(ToolName, &Entry);
+}
+
+void AtToolExit(std::string ToolName, vendor_code::TestTelemeter *T) {
+ vendor_code::ExitEvent Entry =
+ T->makeDefaultTelemetryInfo<vendor_code::ExitEvent>();
+ Entry.Stats = {ExitTime, ExitCompleteTime};
+
+ if (T->CurrentContext->HasExitError) {
+ Entry.ExitDesc = {1, T->CurrentContext->ExitMsg};
+ }
+ T->atExit(ToolName, &Entry);
+}
+
+void AtToolMidPoint(vendor_code::TestTelemeter *T) {
+ vendor_code::CustomTelemetryEvent Entry =
+ T->makeDefaultTelemetryInfo<vendor_code::CustomTelemetryEvent>();
+ Entry.Stats = {MidPointTime, MidPointCompleteTime};
+ T->atMidpoint(&Entry);
+}
+
+// Without vendor's implementation, telemetry is not enabled by default.
+TEST(TelemetryTest, TelemetryDefault) {
+ // Preset some test params.
+ TestContext Context;
+ Context.HasVendorConfig = false;
+ TestContext *CurrentContext = &Context;
+
+ std::shared_ptr<llvm::telemetry::Config> Config =
+ GetTelemetryConfig(CurrentContext);
+ auto Tool =
+ vendor_code::TestTelemeter::createInstance(Config.get(), CurrentContext);
+
+ EXPECT_EQ(nullptr, Tool.get());
+}
+
+TEST(TelemetryTest, TelemetryEnabled) {
+ const std::string ToolName = "TelemetryTest";
+
+ // Preset some test params.
+ TestContext Context;
+ Context.HasVendorConfig = true;
+ Context.SanitizeData = false;
+ Context.Buffer.clear();
+ Context.EmittedJsons.clear();
+ TestContext *CurrentContext = &Context;
+
+ std::shared_ptr<llvm::telemetry::Config> Config =
+ GetTelemetryConfig(CurrentContext);
+
+ // Add some destinations
+ Config->AdditionalDestinations.push_back(vendor_code::STRING_DEST.str());
+ Config->AdditionalDestinations.push_back(vendor_code::JSON_DEST.str());
+
+ auto Tool =
+ vendor_code::TestTelemeter::createInstance(Config.get(), CurrentContext);
+
+ AtToolStart(ToolName, Tool.get());
+ AtToolMidPoint(Tool.get());
+ AtToolExit(ToolName, Tool.get());
+
+ // Check that the Tool uses the expected UUID.
+ EXPECT_STREQ(Tool->getUuid().c_str(), CurrentContext->ExpectedUuid.c_str());
+
+ // Check that the StringDestination emitted properly
+ {
+ std::string ExpectedBuffer =
+ ("SessionId:" + llvm::Twine(CurrentContext->ExpectedUuid) + "\n" +
+ "MagicStartupMsg:Startup_" + llvm::Twine(ToolName) + "\n" +
+ "SessionId:" + llvm::Twine(CurrentContext->ExpectedUuid) + "\n" +
+ "MSG_0:Two\n" + "MSG_1:Deux\n" + "MSG_2:Zwei\n" +
+ "SessionId:" + llvm::Twine(CurrentContext->ExpectedUuid) + "\n" +
+ "MagicExitMsg:Exit_" + llvm::Twine(ToolName) + "\n")
+ .str();
+
+ EXPECT_STREQ(ExpectedBuffer.c_str(), CurrentContext->Buffer.c_str());
+ }
+
+ // Check that the JsonDestination emitted properly
+ {
+
+ // There should be 3 events emitted by the Telemeter (start, midpoint, exit)
+ EXPECT_EQ(static_cast<size_t>(3), CurrentContext->EmittedJsons.size());
+
+ const json::Value *StartupEntry =
+ CurrentContext->EmittedJsons[0].get("Startup");
+ ASSERT_NE(StartupEntry, nullptr);
+ llvm::Expected<json::Value> ExpectedStartup = json::parse(
+ ("[[\"SessionId\",\"" + llvm::Twine(CurrentContext->ExpectedUuid) +
+ "\"],[\"MagicStartupMsg\",\"Startup_" + llvm::Twine(ToolName) + "\"]]")
+ .str());
+ ASSERT_TRUE((bool)ExpectedStartup);
+ EXPECT_EQ(ExpectedStartup.get(), *StartupEntry);
+
+ const json::Value *MidpointEntry =
+ CurrentContext->EmittedJsons[1].get("Midpoint");
+ ASSERT_NE(MidpointEntry, nullptr);
+ llvm::Expected<json::Value> ExpectedMidpoint =
+ json::parse(("{\"MSG_0\":\"Two\",\"MSG_1\":\"Deux\",\"MSG_2\":\"Zwei\","
+ "\"SessionId\":\"" +
+ llvm::Twine(CurrentContext->ExpectedUuid) + "\"}")
+ .str());
----------------
oontvoo wrote:
The string literal was (IMO) a bit easier and faster to see what the expected data should be
https://github.com/llvm/llvm-project/pull/102323
More information about the llvm-commits
mailing list