[llvm] [llvm]Add a simple Telemetry framework (PR #102323)

Vy Nguyen via llvm-commits llvm-commits at lists.llvm.org
Tue Sep 3 10:52:37 PDT 2024


https://github.com/oontvoo updated https://github.com/llvm/llvm-project/pull/102323

>From dbb8b15edb5e63f37a66dd15e67d46ee1b4f6c1b Mon Sep 17 00:00:00 2001
From: Vy Nguyen <vyng at google.com>
Date: Wed, 7 Aug 2024 11:08:44 -0400
Subject: [PATCH 01/10] [llvm][lib]Propose a simple Telemetry framework.

Objective:
  - Provide a common framework in LLVM for collecting various usage metrics
  - Characteristics:
      + Extensible and configurable by:
          * tools in LLVM that want to use it
          * vendors in their downstream codebase
          * tools users (as allowed by vendor)

Background:
The framework was originally proposed only for LLDB, but there were quite a few requests
that it should be moved to llvm/lib given telemetry is a common usage to a lot of tools,
not just LLDB.

See more details on the design and discussions here on the RFC: https://discourse.llvm.org/t/rfc-lldb-telemetry-metrics/64588/20?u=oontvoo
---
 llvm/include/llvm/Telemetry/Telemetry.h | 99 +++++++++++++++++++++++++
 llvm/lib/CMakeLists.txt                 |  1 +
 llvm/lib/Telemetry/CMakeLists.txt       |  6 ++
 llvm/lib/Telemetry/Telemetry.cpp        | 32 ++++++++
 4 files changed, 138 insertions(+)
 create mode 100644 llvm/include/llvm/Telemetry/Telemetry.h
 create mode 100644 llvm/lib/Telemetry/CMakeLists.txt
 create mode 100644 llvm/lib/Telemetry/Telemetry.cpp

diff --git a/llvm/include/llvm/Telemetry/Telemetry.h b/llvm/include/llvm/Telemetry/Telemetry.h
new file mode 100644
index 00000000000000..e34b228b219c10
--- /dev/null
+++ b/llvm/include/llvm/Telemetry/Telemetry.h
@@ -0,0 +1,99 @@
+#ifndef LVM_TELEMETRY_TELEMETRY_H
+#define LVM_TELEMETRY_TELEMETRY_H
+
+#include <chrono>
+#include <ctime>
+#include <memory>
+#include <optional>
+#include <string>
+
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/JSON.h"
+
+namespace llvm {
+namespace telemetry {
+
+using SteadyTimePoint = std::chrono::time_point<std::chrono::steady_clock>;
+
+struct TelemetryConfig {
+  // If true, telemetry will be enabled.
+  bool enable_telemetry;
+
+  // Additional destinations to send the logged entries.
+  // Could be stdout, stderr, or some local paths.
+  // Note: these are destinations are __in addition to__ whatever the default
+  // destination(s) are, as implemented by vendors.
+  std::vector<std::string> additional_destinations;
+};
+
+struct TelemetryEventStats {
+  // REQUIRED: Start time of event
+  SteadyTimePoint m_start;
+  // OPTIONAL: End time of event - may be empty if not meaningful.
+  std::optional<SteadyTimePoint> m_end;
+  // TBD: could add some memory stats here too?
+
+  TelemetryEventStats() = default;
+  TelemetryEventStats(SteadyTimePoint start) : m_start(start) {}
+  TelemetryEventStats(SteadyTimePoint start, SteadyTimePoint end)
+      : m_start(start), m_end(end) {}
+
+  std::string ToString() const;
+};
+
+struct ExitDescription {
+  int exit_code;
+  std::string description;
+
+  std::string ToString() const;
+};
+
+// The base class contains the basic set of data.
+// Downstream implementations can add more fields as needed.
+struct TelemetryInfo {
+  // A "session" corresponds to every time the tool starts.
+  // All entries emitted for the same session will have
+  // the same session_uuid
+  std::string session_uuid;
+
+  TelemetryEventStats stats;
+
+  std::optional<ExitDescription> exit_description;
+
+  // Counting number of entries.
+  // (For each set of entries with the same session_uuid, this value should
+  // be unique for each entry)
+  size_t counter;
+
+  TelemetryInfo() = default;
+  ~TelemetryInfo() = default;
+  virtual std::string ToString() const;
+};
+
+// Where/how to send the telemetry entries.
+class TelemetryDestination {
+public:
+  virtual ~TelemetryDestination() = default;
+  virtual Error EmitEntry(const TelemetryInfo *entry) = 0;
+  virtual std::string name() const = 0;
+};
+
+class Telemeter {
+public:
+  virtual ~Telemeter() = default;
+
+  // Invoked upon tool startup
+  virtual void LogStartup(llvm::StringRef tool_path, TelemetryInfo *entry) = 0;
+
+  // Invoked upon tool exit.
+  virtual void LogExit(llvm::StringRef tool_path, TelemetryInfo *entry) = 0;
+
+  virtual void AddDestination(TelemetryDestination *destination) = 0;
+};
+
+} // namespace telemetry
+} // namespace llvm
+
+#endif // LVM_TELEMETRY_TELEMETRY_H
diff --git a/llvm/lib/CMakeLists.txt b/llvm/lib/CMakeLists.txt
index 638c3bd6f90f53..1d2fb329226484 100644
--- a/llvm/lib/CMakeLists.txt
+++ b/llvm/lib/CMakeLists.txt
@@ -41,6 +41,7 @@ add_subdirectory(ProfileData)
 add_subdirectory(Passes)
 add_subdirectory(TargetParser)
 add_subdirectory(TextAPI)
+add_subdirectory(Telemetry)
 add_subdirectory(ToolDrivers)
 add_subdirectory(XRay)
 if (LLVM_INCLUDE_TESTS)
diff --git a/llvm/lib/Telemetry/CMakeLists.txt b/llvm/lib/Telemetry/CMakeLists.txt
new file mode 100644
index 00000000000000..8208bdadb05e94
--- /dev/null
+++ b/llvm/lib/Telemetry/CMakeLists.txt
@@ -0,0 +1,6 @@
+add_llvm_component_library(LLVMTelemetry
+  Telemetry.cpp
+
+  ADDITIONAL_HEADER_DIRS
+  "${LLVM_MAIN_INCLUDE_DIR}/llvm/Telemetry"
+)
diff --git a/llvm/lib/Telemetry/Telemetry.cpp b/llvm/lib/Telemetry/Telemetry.cpp
new file mode 100644
index 00000000000000..f7100685ee2d2b
--- /dev/null
+++ b/llvm/lib/Telemetry/Telemetry.cpp
@@ -0,0 +1,32 @@
+#include "llvm/Telemetry/Telemetry.h"
+
+namespace llvm {
+namespace telemetry {
+
+std::string TelemetryEventStats::ToString() const {
+  std::string result;
+  llvm::raw_string_ostream os(result);
+  os << "start_timestamp: " << m_start.time_since_epoch().count()
+     << ", end_timestamp: "
+     << (m_end.has_value() ? std::to_string(m_end->time_since_epoch().count())
+                           : "<NONE>");
+  return result;
+}
+
+std::string ExitDescription::ToString() const {
+  return "exit_code: " + std::to_string(exit_code) +
+         ", description: " + description + "\n";
+}
+
+std::string TelemetryInfo::ToString() const {
+  return "[TelemetryInfo]\n" + ("  session_uuid:" + session_uuid + "\n") +
+         ("  stats: " + stats.ToString() + "\n") +
+         ("  exit_description: " +
+          (exit_description.has_value() ? exit_description->ToString()
+                                        : "<NONE>") +
+          "\n") +
+         ("  counter: " + std::to_string(counter) + "\n");
+}
+
+} // namespace telemetry
+} // namespace llvm
\ No newline at end of file

>From 6e43f67c9f0412fbf0f4a16f2be68daa40d4b6f4 Mon Sep 17 00:00:00 2001
From: Vy Nguyen <vyng at google.com>
Date: Wed, 7 Aug 2024 13:21:32 -0400
Subject: [PATCH 02/10] add header

---
 llvm/include/llvm/Telemetry/Telemetry.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/llvm/include/llvm/Telemetry/Telemetry.h b/llvm/include/llvm/Telemetry/Telemetry.h
index e34b228b219c10..fab170c61c02ec 100644
--- a/llvm/include/llvm/Telemetry/Telemetry.h
+++ b/llvm/include/llvm/Telemetry/Telemetry.h
@@ -1,3 +1,15 @@
+//===- llvm/Telemetry/Telemetry.h - Telemetry -------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Defines the commom Telemetry framework.
+//
+//===----------------------------------------------------------------------===//
+
 #ifndef LVM_TELEMETRY_TELEMETRY_H
 #define LVM_TELEMETRY_TELEMETRY_H
 

>From e25f5fcd79f84086f4fddb7288d353cf0c0858c0 Mon Sep 17 00:00:00 2001
From: Vy Nguyen <vyng at google.com>
Date: Wed, 7 Aug 2024 14:25:22 -0400
Subject: [PATCH 03/10] fixed typo

---
 llvm/include/llvm/Telemetry/Telemetry.h | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/llvm/include/llvm/Telemetry/Telemetry.h b/llvm/include/llvm/Telemetry/Telemetry.h
index fab170c61c02ec..dc24f9c3fc3fb8 100644
--- a/llvm/include/llvm/Telemetry/Telemetry.h
+++ b/llvm/include/llvm/Telemetry/Telemetry.h
@@ -5,13 +5,9 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
-//
-// Defines the commom Telemetry framework.
-//
-//===----------------------------------------------------------------------===//
 
-#ifndef LVM_TELEMETRY_TELEMETRY_H
-#define LVM_TELEMETRY_TELEMETRY_H
+#ifndef LLVM_TELEMETRY_TELEMETRY_H
+#define LLVM_TELEMETRY_TELEMETRY_H
 
 #include <chrono>
 #include <ctime>
@@ -108,4 +104,4 @@ class Telemeter {
 } // namespace telemetry
 } // namespace llvm
 
-#endif // LVM_TELEMETRY_TELEMETRY_H
+#endif // LLVM_TELEMETRY_TELEMETRY_H

>From 0057bcf63de085a4d41184eac7d4d4fe9ba7f299 Mon Sep 17 00:00:00 2001
From: Vy Nguyen <vyng at google.com>
Date: Wed, 28 Aug 2024 22:12:30 -0400
Subject: [PATCH 04/10] add tests and addressed review comments

---
 llvm/include/llvm/Telemetry/Telemetry.h    |  53 +-
 llvm/lib/Telemetry/Telemetry.cpp           |  31 +-
 llvm/unittests/CMakeLists.txt              |   1 +
 llvm/unittests/Telemetry/CMakeLists.txt    |   9 +
 llvm/unittests/Telemetry/TelemetryTest.cpp | 606 +++++++++++++++++++++
 5 files changed, 652 insertions(+), 48 deletions(-)
 create mode 100644 llvm/unittests/Telemetry/CMakeLists.txt
 create mode 100644 llvm/unittests/Telemetry/TelemetryTest.cpp

diff --git a/llvm/include/llvm/Telemetry/Telemetry.h b/llvm/include/llvm/Telemetry/Telemetry.h
index dc24f9c3fc3fb8..935b1feddbed6e 100644
--- a/llvm/include/llvm/Telemetry/Telemetry.h
+++ b/llvm/include/llvm/Telemetry/Telemetry.h
@@ -27,35 +27,37 @@ using SteadyTimePoint = std::chrono::time_point<std::chrono::steady_clock>;
 
 struct TelemetryConfig {
   // If true, telemetry will be enabled.
-  bool enable_telemetry;
+  bool EnableTelemetry;
 
   // Additional destinations to send the logged entries.
   // Could be stdout, stderr, or some local paths.
   // Note: these are destinations are __in addition to__ whatever the default
   // destination(s) are, as implemented by vendors.
-  std::vector<std::string> additional_destinations;
+  std::vector<std::string> AdditionalDestinations;
 };
 
 struct TelemetryEventStats {
   // REQUIRED: Start time of event
-  SteadyTimePoint m_start;
+  SteadyTimePoint Start;
   // OPTIONAL: End time of event - may be empty if not meaningful.
-  std::optional<SteadyTimePoint> m_end;
+  std::optional<SteadyTimePoint> End;
   // TBD: could add some memory stats here too?
 
   TelemetryEventStats() = default;
-  TelemetryEventStats(SteadyTimePoint start) : m_start(start) {}
-  TelemetryEventStats(SteadyTimePoint start, SteadyTimePoint end)
-      : m_start(start), m_end(end) {}
-
-  std::string ToString() const;
+  TelemetryEventStats(SteadyTimePoint Start) : Start(Start) {}
+  TelemetryEventStats(SteadyTimePoint Start, SteadyTimePoint End)
+      : Start(Start), End(End) {}
 };
 
 struct ExitDescription {
-  int exit_code;
-  std::string description;
+  int ExitCode;
+  std::string Description;
+};
 
-  std::string ToString() const;
+// For isa, dyn_cast, etc operations on TelemetryInfo.
+typedef unsigned KindType;
+struct EntryKind {
+  static const KindType Base = 0;
 };
 
 // The base class contains the basic set of data.
@@ -64,41 +66,46 @@ struct TelemetryInfo {
   // A "session" corresponds to every time the tool starts.
   // All entries emitted for the same session will have
   // the same session_uuid
-  std::string session_uuid;
+  std::string SessionUuid;
 
-  TelemetryEventStats stats;
+  TelemetryEventStats Stats;
 
-  std::optional<ExitDescription> exit_description;
+  std::optional<ExitDescription> ExitDesc;
 
   // Counting number of entries.
   // (For each set of entries with the same session_uuid, this value should
   // be unique for each entry)
-  size_t counter;
+  size_t Counter;
 
   TelemetryInfo() = default;
   ~TelemetryInfo() = default;
-  virtual std::string ToString() const;
+
+  virtual json::Object serializeToJson() const;
+
+  // For isa, dyn_cast, etc, operations.
+  virtual KindType getEntryKind() const { return EntryKind::Base; }
+  static bool classof(const TelemetryInfo* T) {
+    return T->getEntryKind() == EntryKind::Base;
+  }
 };
 
 // Where/how to send the telemetry entries.
 class TelemetryDestination {
 public:
   virtual ~TelemetryDestination() = default;
-  virtual Error EmitEntry(const TelemetryInfo *entry) = 0;
+  virtual Error emitEntry(const TelemetryInfo *Entry) = 0;
   virtual std::string name() const = 0;
 };
 
 class Telemeter {
 public:
-  virtual ~Telemeter() = default;
-
   // Invoked upon tool startup
-  virtual void LogStartup(llvm::StringRef tool_path, TelemetryInfo *entry) = 0;
+  virtual void logStartup(llvm::StringRef ToolPath, TelemetryInfo *Entry) = 0;
 
   // Invoked upon tool exit.
-  virtual void LogExit(llvm::StringRef tool_path, TelemetryInfo *entry) = 0;
+  virtual void logExit(llvm::StringRef ToolPath, TelemetryInfo *Entry) = 0;
 
-  virtual void AddDestination(TelemetryDestination *destination) = 0;
+  virtual void addDestination(TelemetryDestination *Destination) = 0;
 };
 
 } // namespace telemetry
diff --git a/llvm/lib/Telemetry/Telemetry.cpp b/llvm/lib/Telemetry/Telemetry.cpp
index f7100685ee2d2b..b9605c4a38ecd0 100644
--- a/llvm/lib/Telemetry/Telemetry.cpp
+++ b/llvm/lib/Telemetry/Telemetry.cpp
@@ -3,30 +3,11 @@
 namespace llvm {
 namespace telemetry {
 
-std::string TelemetryEventStats::ToString() const {
-  std::string result;
-  llvm::raw_string_ostream os(result);
-  os << "start_timestamp: " << m_start.time_since_epoch().count()
-     << ", end_timestamp: "
-     << (m_end.has_value() ? std::to_string(m_end->time_since_epoch().count())
-                           : "<NONE>");
-  return result;
-}
-
-std::string ExitDescription::ToString() const {
-  return "exit_code: " + std::to_string(exit_code) +
-         ", description: " + description + "\n";
-}
-
-std::string TelemetryInfo::ToString() const {
-  return "[TelemetryInfo]\n" + ("  session_uuid:" + session_uuid + "\n") +
-         ("  stats: " + stats.ToString() + "\n") +
-         ("  exit_description: " +
-          (exit_description.has_value() ? exit_description->ToString()
-                                        : "<NONE>") +
-          "\n") +
-         ("  counter: " + std::to_string(counter) + "\n");
-}
+llvm::json::Object TelemetryInfo::serializeToJson() const {
+  return json::Object {
+    {"UUID", SessionUuid},
+  };
+};
 
 } // namespace telemetry
-} // namespace llvm
\ No newline at end of file
+} // namespace llvm
diff --git a/llvm/unittests/CMakeLists.txt b/llvm/unittests/CMakeLists.txt
index 911ede701982f6..9d6b3999c43958 100644
--- a/llvm/unittests/CMakeLists.txt
+++ b/llvm/unittests/CMakeLists.txt
@@ -49,6 +49,7 @@ add_subdirectory(Support)
 add_subdirectory(TableGen)
 add_subdirectory(Target)
 add_subdirectory(TargetParser)
+add_subdirectory(Telemetry)
 add_subdirectory(Testing)
 add_subdirectory(TextAPI)
 add_subdirectory(Transforms)
diff --git a/llvm/unittests/Telemetry/CMakeLists.txt b/llvm/unittests/Telemetry/CMakeLists.txt
new file mode 100644
index 00000000000000..a40ae4b2f55607
--- /dev/null
+++ b/llvm/unittests/Telemetry/CMakeLists.txt
@@ -0,0 +1,9 @@
+set(LLVM_LINK_COMPONENTS
+  Telemetry
+  Core
+  Support
+  )
+
+add_llvm_unittest(TelemetryTests
+  TelemetryTest.cpp
+  )
diff --git a/llvm/unittests/Telemetry/TelemetryTest.cpp b/llvm/unittests/Telemetry/TelemetryTest.cpp
new file mode 100644
index 00000000000000..36a4daf55e1018
--- /dev/null
+++ b/llvm/unittests/Telemetry/TelemetryTest.cpp
@@ -0,0 +1,606 @@
+//===- llvm/unittest/Telemetry/TelemetryTest.cpp - Telemetry unittests ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Telemetry/Telemetry.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/JSON.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/raw_ostream.h"
+#include "gtest/gtest.h"
+
+#include <cstdio>
+#include <vector>
+#include <chrono>
+#include <ctime>
+
+#include <iostream>  // TODO: remove this
+
+// Testing parameters.ve
+static thread_local bool HasExitError = false;
+static thread_local std::string ExitMsg = "";
+static thread_local bool HasVendorConfig = false;
+static thread_local bool SanitizeData = false;
+static thread_local std::string Buffer = "";
+static thread_local std::vector<llvm::json::Object> EmittedJsons;
+
+namespace llvm {
+namespace telemetry {
+namespace vendor_code {
+
+// Generate unique (but deterministic "uuid" for testing purposes).
+static std::string nextUuid() {
+  static size_t seed = 1111;
+  return std::to_string(seed++);
+}
+
+struct VendorEntryKind {
+   // TODO: should avoid dup with other vendors' Types?
+  static const KindType VendorCommon = 0b010101000;
+  static const KindType Startup      = 0b010101001;
+  static const KindType Exit         = 0b010101010;
+ };
+
+
+// Demonstrates that the TelemetryInfo (data courier) struct can be extended
+// by downstream code to store additional data as needed.
+// It can also define additional data serialization method.
+struct VendorCommonTelemetryInfo : public TelemetryInfo {
+
+  static bool classof(const TelemetryInfo* T) {
+    // Subclasses of this is also acceptable.
+    return (T->getEntryKind() & VendorEntryKind::VendorCommon)  == VendorEntryKind::VendorCommon;
+
+  }
+
+  KindType getEntryKind() const override { return VendorEntryKind::VendorCommon;}
+
+  virtual void serializeToStream(llvm::raw_ostream &OS) const = 0;
+};
+
+
+struct StartupEvent : public VendorCommonTelemetryInfo {
+  std::string MagicStartupMsg;
+
+  StartupEvent() = default;
+  StartupEvent(const StartupEvent &E) {
+    SessionUuid = E.SessionUuid;
+    Stats = E.Stats;
+    ExitDesc = E.ExitDesc;
+    Counter = E.Counter;
+
+    MagicStartupMsg = E.MagicStartupMsg;
+  }
+
+  static bool classof(const TelemetryInfo* T) {
+    return T->getEntryKind() == VendorEntryKind::Startup;
+  }
+
+  KindType getEntryKind() const override { return VendorEntryKind::Startup;}
+
+  void serializeToStream(llvm::raw_ostream &OS) const override {
+    OS<< "UUID:" << SessionUuid << "\n";
+    OS << "MagicStartupMsg:" << MagicStartupMsg << "\n";
+  }
+
+  json::Object serializeToJson() const override {
+    return json::Object{
+      {"Startup", { {"UUID", SessionUuid},
+                    {"MagicStartupMsg", MagicStartupMsg}}},
+    };
+  }
+};
+
+struct ExitEvent : public VendorCommonTelemetryInfo {
+  std::string MagicExitMsg;
+
+  ExitEvent() = default;
+  // Provide a copy ctor because we may need to make a copy
+  // before sanitizing the Entry.
+  ExitEvent(const ExitEvent &E) {
+    SessionUuid = E.SessionUuid;
+    Stats = E.Stats;
+    ExitDesc = E.ExitDesc;
+    Counter = E.Counter;
+
+    MagicExitMsg = E.MagicExitMsg;
+  }
+
+  static bool classof(const TelemetryInfo* T) {
+    return T->getEntryKind() == VendorEntryKind::Exit;
+  }
+
+  unsigned getEntryKind() const override { return VendorEntryKind::Exit;}
+
+  void serializeToStream(llvm::raw_ostream &OS) const override {
+    OS << "UUID:" << SessionUuid << "\n";
+    if (ExitDesc.has_value())
+      OS << "ExitCode:" << ExitDesc->ExitCode << "\n";
+    OS << "MagicExitMsg:" << MagicExitMsg << "\n";
+  }
+
+  json::Object serializeToJson() const override {
+    json::Array I =  json::Array{
+        {"UUID", SessionUuid},
+        {"MagicExitMsg", MagicExitMsg},
+    };
+    if (ExitDesc.has_value())
+      I.push_back(json::Value({"ExitCode", ExitDesc->ExitCode}));
+    return json::Object {
+      {"Exit", std::move(I)},
+    };
+  }
+};
+
+struct CustomTelemetryEvent : public VendorCommonTelemetryInfo {
+  std::vector<std::string> Msgs;
+
+  CustomTelemetryEvent() = default;
+  CustomTelemetryEvent(const CustomTelemetryEvent &E) {
+    SessionUuid = E.SessionUuid;
+    Stats = E.Stats;
+    ExitDesc = E.ExitDesc;
+    Counter = E.Counter;
+
+    Msgs = E.Msgs;
+  }
+
+  void serializeToStream(llvm::raw_ostream &OS) const override {
+    int I = 0;
+    for (const std::string &M : Msgs) {
+      OS << "MSG_" << I << ":" << M << "\n";
+      ++I;
+    }
+  }
+
+  json::Object serializeToJson() const  override {
+    json::Object Inner;
+    int I = 0;
+    for (const std::string &M : Msgs) {
+      Inner.try_emplace(("MSG_" + llvm::Twine(I)).str(), M);
+      ++I;
+    }
+    return json::Object {
+      {"Midpoint", std::move(Inner)}}
+    ;
+  }
+};
+
+
+// The following classes demonstrate how downstream code can
+// define one or more custom TelemetryDestination(s) to handle
+// Telemetry data differently, specifically:
+//    + which data to send (fullset or sanitized)
+//    + where to send the data
+//    + in what form
+
+const std::string STRING_DEST( "STRING");
+const std::string JSON_DEST ("JSON");
+
+// This Destination sends data to a std::string given at ctor.
+class StringDestination : public TelemetryDestination {
+public:
+  // ShouldSanitize: if true, sanitize the data before emitting, otherwise, emit
+  // the full set.
+  StringDestination(bool ShouldSanitize, std::string& Buf)
+      : ShouldSanitize(ShouldSanitize), OS(Buf) {
+  }
+
+  Error emitEntry(const TelemetryInfo *Entry) override {
+    if (isa<VendorCommonTelemetryInfo>(Entry)) {
+      if (auto *E = dyn_cast<VendorCommonTelemetryInfo>(Entry)) {
+        if (ShouldSanitize) {
+          if (isa<StartupEvent>(E) || isa<ExitEvent>(E)) {
+            // There is nothing to sanitize for this type of data, so keep as-is.
+            E->serializeToStream(OS);
+          } else if (isa<CustomTelemetryEvent>(E)) {
+            auto Sanitized = sanitizeFields(dyn_cast<CustomTelemetryEvent>(E));
+            Sanitized.serializeToStream(OS);
+          } else {
+            llvm_unreachable("unexpected type");
+          }
+        } else {
+          E->serializeToStream(OS);
+        }
+      }
+    } else {
+      // Unfamiliar entries, just send the entry's UUID
+      OS << "UUID:" << Entry->SessionUuid << "\n";
+    }
+    return Error::success();
+  }
+
+  std::string name() const override { return STRING_DEST;}
+
+private:
+  // Returns a copy of the given entry, but with some fields sanitized.
+  CustomTelemetryEvent sanitizeFields(const CustomTelemetryEvent* Entry) {
+    CustomTelemetryEvent Sanitized(*Entry);
+    // Pretend that messages stored at ODD positions are "sensitive",
+    // hence need to be sanitized away.
+    int S = Sanitized.Msgs.size() - 1;
+    for (int I = S % 2 == 0 ? S - 1 : S; I >= 0; I -= 2)
+      Sanitized.Msgs[I]="";
+      return Sanitized;
+  }
+
+  bool ShouldSanitize;
+  llvm::raw_string_ostream OS;
+
+};
+
+// This Destination sends data to some "blackbox" in form of JSON.
+class JsonStreamDestination : public TelemetryDestination {
+public:
+  JsonStreamDestination(bool ShouldSanitize)
+      : ShouldSanitize(ShouldSanitize) {}
+
+  Error emitEntry(const TelemetryInfo *Entry) override {
+    if (auto *E = dyn_cast<VendorCommonTelemetryInfo>(Entry)) {
+        if (ShouldSanitize) {
+          if (isa<StartupEvent>(E) || isa<ExitEvent>(E)) {
+            // There is nothing to sanitize for this type of data, so keep as-is.
+            return SendToBlackbox(E->serializeToJson());
+          } else if (isa<CustomTelemetryEvent>(E)) {
+            auto Sanitized = sanitizeFields(dyn_cast<CustomTelemetryEvent>(E));
+            return SendToBlackbox(Sanitized.serializeToJson());
+          } else {
+            llvm_unreachable("unexpected type");
+          }
+        } else {
+          return SendToBlackbox(E->serializeToJson());
+        }
+      }  else {
+      // Unfamiliar entries, just send the entry's UUID
+     return SendToBlackbox(json::Object{{"UUID", Entry->SessionUuid}});
+    }
+    return make_error<StringError>("unhandled codepath in emitEntry",
+                                        inconvertibleErrorCode());
+  }
+
+  std::string name() const override { return JSON_DEST;}
+private:
+
+  // Returns a copy of the given entry, but with some fields sanitized.
+  CustomTelemetryEvent sanitizeFields(const CustomTelemetryEvent* Entry) {
+    CustomTelemetryEvent Sanitized(*Entry);
+    // Pretend that messages stored at EVEN positions are "sensitive",
+    // hence need to be sanitized away.
+    int S = Sanitized.Msgs.size() - 1;
+    for (int I = S % 2 == 0 ? S : S - 1; I >= 0; I -= 2)
+      Sanitized.Msgs[I]="";
+
+    return Sanitized;
+  }
+
+  llvm::Error SendToBlackbox(json::Object O) {
+    // Here is where the vendor-defined Destination class can
+    // send the data to some internal storage.
+    // For testing purposes, we just queue up the entries to
+    // the vector for validation.
+    EmittedJsons.push_back(std::move(O));
+    return Error::success();
+  }
+  bool ShouldSanitize;
+};
+
+// Custom vendor-defined Telemeter that has additional data-collection point.
+class TestTelemeter : public Telemeter {
+public:
+  TestTelemeter(std::string SessionUuid) : Uuid(SessionUuid), Counter(0) {}
+
+  static std::unique_ptr<TestTelemeter> createInstance(TelemetryConfig *config) {
+    if (!config->EnableTelemetry) return std::unique_ptr<TestTelemeter>(nullptr);
+      std::unique_ptr<TestTelemeter> Telemeter = std::make_unique<TestTelemeter>(nextUuid());
+      // Set up Destination based on the given config.
+      for (const std::string &Dest : config->AdditionalDestinations) {
+        // The destination(s) are ALSO defined by vendor, so it should understand
+        // what the name of each destination signifies.
+        if (Dest == JSON_DEST) {
+          Telemeter->addDestination(new vendor_code::JsonStreamDestination(SanitizeData));
+        } else if (Dest == STRING_DEST) {
+          Telemeter->addDestination(new vendor_code::StringDestination(SanitizeData, Buffer));
+        } else {
+          llvm_unreachable(llvm::Twine("unknown destination: ", Dest).str().c_str());
+        }
+      }
+      return Telemeter;
+  }
+
+  void logStartup(llvm::StringRef ToolPath, TelemetryInfo *Entry) override {
+    ToolName = ToolPath.str();
+
+    // The vendor can add additional stuff to the entry before logging.
+    if (auto* S = dyn_cast<StartupEvent>(Entry)) {
+      S->MagicStartupMsg = llvm::Twine("One_", ToolPath).str();
+    }
+    emitToDestinations(Entry);
+  }
+
+  void logExit(llvm::StringRef ToolPath, TelemetryInfo *Entry) override {
+    // Ensure we're shutting down the same tool we started with.
+    if (ToolPath != ToolName){
+      std::string Str;
+      raw_string_ostream OS(Str);
+      OS << "Expected tool with name" << ToolName << ", but got " << ToolPath;
+      llvm_unreachable(Str.c_str());
+    }
+
+    // The vendor can add additional stuff to the entry before logging.
+    if (auto * E = dyn_cast<ExitEvent>(Entry)) {
+      E->MagicExitMsg = llvm::Twine("Three_", ToolPath).str();
+    }
+
+    emitToDestinations(Entry);
+  }
+
+  void addDestination(TelemetryDestination* Dest) override {
+    Destinations.push_back(Dest);
+  }
+
+  void logMidpoint(TelemetryInfo *Entry) {
+    // The custom Telemeter can record and send additional data.
+    if (auto * C = dyn_cast<CustomTelemetryEvent>(Entry)) {
+      C->Msgs.push_back("Two");
+      C->Msgs.push_back("Deux");
+      C->Msgs.push_back("Zwei");
+    }
+
+    emitToDestinations(Entry);
+  }
+
+  ~TestTelemeter() {
+    for (auto* Dest : Destinations)
+      delete Dest;
+  }
+
+  template <typename T>
+  T makeDefaultTelemetryInfo() {
+    T Ret;
+    Ret.SessionUuid = Uuid;
+    Ret.Counter = Counter++;
+    return Ret;
+  }
+private:
+
+  void emitToDestinations(TelemetryInfo *Entry) {
+    for (TelemetryDestination *Dest : Destinations) {
+      llvm::Error err = Dest->emitEntry(Entry);
+      if(err) {
+        // Log it and move on.
+      }
+    }
+  }
+
+  const std::string Uuid;
+  size_t Counter;
+  std::string ToolName;
+  std::vector<TelemetryDestination *> Destinations;
+};
+
+// Pretend to be a "weakly" defined vendor-specific function.
+void ApplyVendorSpecificConfigs(TelemetryConfig *config) {
+  config->EnableTelemetry = true;
+}
+
+} // namespace vendor_code
+} // namespace telemetry
+} // namespace llvm
+
+namespace {
+
+void ApplyCommonConfig(llvm::telemetry::TelemetryConfig* config) {
+  // Any shareable configs for the upstream tool can go here.
+  // .....
+}
+
+std::shared_ptr<llvm::telemetry::TelemetryConfig> GetTelemetryConfig() {
+  // Telemetry is disabled by default.
+  // The vendor can enable in their config.
+  auto Config = std::make_shared<llvm::telemetry::TelemetryConfig>();
+  Config->EnableTelemetry = false;
+
+  ApplyCommonConfig(Config.get());
+
+  // Apply vendor specific config, if present.
+  // In practice, this would be a build-time param.
+  // Eg:
+  //
+  // #ifdef HAS_VENDOR_TELEMETRY_CONFIG
+  //     llvm::telemetry::vendor_code::ApplyVendorSpecificConfigs(config.get());
+  // #endif
+  // But for unit testing, we use the testing params defined at the top.
+  if (HasVendorConfig) {
+    llvm::telemetry::vendor_code::ApplyVendorSpecificConfigs(Config.get());
+  }
+  return Config;
+}
+
+using namespace llvm;
+using namespace llvm::telemetry;
+
+// For deterministic tests, pre-defined certain important time-points
+// rather than using now().
+//
+// Preset StartTime to EPOCH.
+auto StartTime = std::chrono::time_point<std::chrono::steady_clock>{};
+// Pretend the time it takes for the tool's initialization is EPOCH + 5 milliseconds
+auto InitCompleteTime = StartTime + std::chrono::milliseconds(5);
+auto MidPointTime = StartTime + std::chrono::milliseconds(10);
+auto MidPointCompleteTime = MidPointTime + std::chrono::milliseconds(5);
+// Preset ExitTime to EPOCH + 20 milliseconds
+auto ExitTime = StartTime + std::chrono::milliseconds(20);
+// Pretend the time it takes to complete tearing down the tool is 10 milliseconds.
+auto ExitCompleteTime = ExitTime + std::chrono::milliseconds(10);
+
+void AtToolStart(std::string ToolName, vendor_code::TestTelemeter* T) {
+  vendor_code::StartupEvent Entry = T->makeDefaultTelemetryInfo<vendor_code::StartupEvent>();
+  Entry.Stats = {StartTime, InitCompleteTime};
+  T->logStartup(ToolName, &Entry);
+}
+
+void AtToolExit(std::string ToolName, vendor_code::TestTelemeter* T) {
+  vendor_code::ExitEvent Entry = T->makeDefaultTelemetryInfo<vendor_code::ExitEvent>();
+  Entry.Stats = {ExitTime, ExitCompleteTime};
+
+  if (HasExitError) {
+    Entry.ExitDesc = {1, ExitMsg};
+  }
+  T->logExit(ToolName, &Entry);
+}
+
+void AtToolMidPoint (vendor_code::TestTelemeter* T) {
+  vendor_code::CustomTelemetryEvent Entry = T->makeDefaultTelemetryInfo<vendor_code::CustomTelemetryEvent>();
+  Entry.Stats = {MidPointTime, MidPointCompleteTime};
+  T->logMidpoint(&Entry);
+}
+
+// Helper function to print the given object content to string.
+static std::string ValueToString(const json::Value* V) {
+  std::string Ret;
+  llvm::raw_string_ostream P(Ret);
+  P << *V;
+  return Ret;
+}
+
+// Without vendor's implementation, telemetry is not enabled by default.
+TEST(TelemetryTest, TelemetryDefault) {
+  HasVendorConfig = false;
+  std::shared_ptr<llvm::telemetry::TelemetryConfig> Config = GetTelemetryConfig();
+  auto Tool = vendor_code::TestTelemeter::createInstance(Config.get());
+
+  EXPECT_EQ(nullptr, Tool.get());
+}
+
+TEST(TelemetryTest, TelemetryEnabled) {
+  const std::string ToolName = "TestToolOne";
+
+  // Preset some test params.
+  HasVendorConfig = true;
+  SanitizeData = false;
+  Buffer = "";
+  EmittedJsons.clear();
+
+  std::shared_ptr<llvm::telemetry::TelemetryConfig> Config = GetTelemetryConfig();
+
+  // Add some destinations
+  Config->AdditionalDestinations.push_back(vendor_code::STRING_DEST);
+  Config->AdditionalDestinations.push_back(vendor_code::JSON_DEST);
+
+  auto Tool = vendor_code::TestTelemeter::createInstance(Config.get());
+
+  AtToolStart(ToolName, Tool.get());
+  AtToolMidPoint(Tool.get());
+  AtToolExit(ToolName, Tool.get());
+
+
+  // Check that the StringDestination emitted properly
+  {
+    std::string ExpectedBuff = "UUID:1111\n"
+                               "MagicStartupMsg:One_TestToolOne\n"
+                               "MSG_0:Two\n"
+                               "MSG_1:Deux\n"
+                               "MSG_2:Zwei\n"
+                               "UUID:1111\n"
+                               "MagicExitMsg:Three_TestToolOne\n";
+
+    EXPECT_STREQ(ExpectedBuff.c_str(), Buffer.c_str());
+  }
+
+  // Check that the JsonDestination emitted properly
+  {
+
+    // There should be 3 events emitted by the Telemeter (start, midpoint, exit)
+    EXPECT_EQ(3, EmittedJsons.size());
+
+    const json::Value* StartupEntry = EmittedJsons[0].get("Startup");
+    ASSERT_NE(StartupEntry, nullptr);
+    EXPECT_STREQ("[[\"UUID\",\"1111\"],[\"MagicStartupMsg\",\"One_TestToolOne\"]]",
+                 ValueToString(StartupEntry).c_str());
+
+    const json::Value* MidpointEntry = EmittedJsons[1].get("Midpoint");
+    ASSERT_NE(MidpointEntry, nullptr);
+    EXPECT_STREQ("{\"MSG_0\":\"Two\",\"MSG_1\":\"Deux\",\"MSG_2\":\"Zwei\"}",
+                 ValueToString(MidpointEntry).c_str());
+
+    const json::Value* ExitEntry = EmittedJsons[2].get("Exit");
+    ASSERT_NE(ExitEntry, nullptr);
+    EXPECT_STREQ("[[\"UUID\",\"1111\"],[\"MagicExitMsg\",\"Three_TestToolOne\"]]",
+                 ValueToString(ExitEntry).c_str());
+  }
+}
+
+// Similar to previous tests, but toggling the data-sanitization option ON.
+// The recorded data should have some fields removed.
+TEST(TelemetryTest, TelemetryEnabledSanitizeData) {
+  const std::string ToolName = "TestToolOne";
+
+  // Preset some test params.
+  HasVendorConfig = true;
+  SanitizeData = true;
+  Buffer = "";
+  EmittedJsons.clear();
+
+  std::shared_ptr<llvm::telemetry::TelemetryConfig> Config = GetTelemetryConfig();
+
+  // Add some destinations
+  Config->AdditionalDestinations.push_back(vendor_code::STRING_DEST);
+  Config->AdditionalDestinations.push_back(vendor_code::JSON_DEST);
+
+  auto Tool = vendor_code::TestTelemeter::createInstance(Config.get());
+
+  AtToolStart(ToolName, Tool.get());
+  AtToolMidPoint(Tool.get());
+  AtToolExit(ToolName, Tool.get());
+
+
+  // Check that the StringDestination emitted properly
+  {
+    // The StringDestination should have removed the odd-positioned msgs.
+    std::string ExpectedBuff = "UUID:1111\n"
+                               "MagicStartupMsg:One_TestToolOne\n"
+                               "MSG_0:Two\n"
+                               "MSG_1:\n"    // was sannitized away.
+                               "MSG_2:Zwei\n"
+                               "UUID:1111\n"
+                               "MagicExitMsg:Three_TestToolOne\n";
+
+    EXPECT_STREQ(ExpectedBuff.c_str(), Buffer.c_str());
+  }
+
+  // Check that the JsonDestination emitted properly
+  {
+
+    // There should be 3 events emitted by the Telemeter (start, midpoint, exit)
+    EXPECT_EQ(3, EmittedJsons.size());
+
+    const json::Value* StartupEntry = EmittedJsons[0].get("Startup");
+    ASSERT_NE(StartupEntry, nullptr);
+    EXPECT_STREQ("[[\"UUID\",\"1111\"],[\"MagicStartupMsg\",\"One_TestToolOne\"]]",
+                 ValueToString(StartupEntry).c_str());
+
+    const json::Value* MidpointEntry = EmittedJsons[1].get("Midpoint");
+    ASSERT_NE(MidpointEntry, nullptr);
+    // The JsonDestination should have removed the even-positioned msgs.
+    EXPECT_STREQ("{\"MSG_0\":\"\",\"MSG_1\":\"Deux\",\"MSG_2\":\"\"}",
+                 ValueToString(MidpointEntry).c_str());
+
+    const json::Value* ExitEntry = EmittedJsons[2].get("Exit");
+    ASSERT_NE(ExitEntry, nullptr);
+    EXPECT_STREQ("[[\"UUID\",\"1111\"],[\"MagicExitMsg\",\"Three_TestToolOne\"]]",
+                 ValueToString(ExitEntry).c_str());
+  }
+}
+
+} // namespace

>From 750e4ac6af7bda76fb730b44b16b86279df42e15 Mon Sep 17 00:00:00 2001
From: Vy Nguyen <vyng at google.com>
Date: Wed, 28 Aug 2024 22:24:43 -0400
Subject: [PATCH 05/10] formatting changes

---
 llvm/include/llvm/Telemetry/Telemetry.h    |   2 +-
 llvm/lib/Telemetry/Telemetry.cpp           |   4 +-
 llvm/unittests/Telemetry/TelemetryTest.cpp | 236 +++++++++++----------
 3 files changed, 125 insertions(+), 117 deletions(-)

diff --git a/llvm/include/llvm/Telemetry/Telemetry.h b/llvm/include/llvm/Telemetry/Telemetry.h
index 935b1feddbed6e..7084b81c0304ae 100644
--- a/llvm/include/llvm/Telemetry/Telemetry.h
+++ b/llvm/include/llvm/Telemetry/Telemetry.h
@@ -84,7 +84,7 @@ struct TelemetryInfo {
 
   // For isa, dyn_cast, etc, operations.
   virtual KindType getEntryKind() const { return EntryKind::Base; }
-  static bool classof(const TelemetryInfo* T) {
+  static bool classof(const TelemetryInfo *T) {
     return T->getEntryKind() == EntryKind::Base;
   }
 };
diff --git a/llvm/lib/Telemetry/Telemetry.cpp b/llvm/lib/Telemetry/Telemetry.cpp
index b9605c4a38ecd0..f49b88e07f1361 100644
--- a/llvm/lib/Telemetry/Telemetry.cpp
+++ b/llvm/lib/Telemetry/Telemetry.cpp
@@ -4,8 +4,8 @@ namespace llvm {
 namespace telemetry {
 
 llvm::json::Object TelemetryInfo::serializeToJson() const {
-  return json::Object {
-    {"UUID", SessionUuid},
+  return json::Object{
+      {"UUID", SessionUuid},
   };
 };
 
diff --git a/llvm/unittests/Telemetry/TelemetryTest.cpp b/llvm/unittests/Telemetry/TelemetryTest.cpp
index 36a4daf55e1018..b89bc584717da5 100644
--- a/llvm/unittests/Telemetry/TelemetryTest.cpp
+++ b/llvm/unittests/Telemetry/TelemetryTest.cpp
@@ -20,12 +20,10 @@
 #include "llvm/Support/raw_ostream.h"
 #include "gtest/gtest.h"
 
-#include <cstdio>
-#include <vector>
 #include <chrono>
 #include <ctime>
+#include <vector>
 
-#include <iostream>  // TODO: remove this
 
 // Testing parameters.ve
 static thread_local bool HasExitError = false;
@@ -46,30 +44,30 @@ static std::string nextUuid() {
 }
 
 struct VendorEntryKind {
-   // TODO: should avoid dup with other vendors' Types?
+  // TODO: should avoid dup with other vendors' Types?
   static const KindType VendorCommon = 0b010101000;
-  static const KindType Startup      = 0b010101001;
-  static const KindType Exit         = 0b010101010;
- };
-
+  static const KindType Startup = 0b010101001;
+  static const KindType Exit = 0b010101010;
+};
 
 // Demonstrates that the TelemetryInfo (data courier) struct can be extended
 // by downstream code to store additional data as needed.
 // It can also define additional data serialization method.
 struct VendorCommonTelemetryInfo : public TelemetryInfo {
 
-  static bool classof(const TelemetryInfo* T) {
+  static bool classof(const TelemetryInfo *T) {
     // Subclasses of this is also acceptable.
-    return (T->getEntryKind() & VendorEntryKind::VendorCommon)  == VendorEntryKind::VendorCommon;
-
+    return (T->getEntryKind() & VendorEntryKind::VendorCommon) ==
+           VendorEntryKind::VendorCommon;
   }
 
-  KindType getEntryKind() const override { return VendorEntryKind::VendorCommon;}
+  KindType getEntryKind() const override {
+    return VendorEntryKind::VendorCommon;
+  }
 
   virtual void serializeToStream(llvm::raw_ostream &OS) const = 0;
 };
 
-
 struct StartupEvent : public VendorCommonTelemetryInfo {
   std::string MagicStartupMsg;
 
@@ -83,21 +81,21 @@ struct StartupEvent : public VendorCommonTelemetryInfo {
     MagicStartupMsg = E.MagicStartupMsg;
   }
 
-  static bool classof(const TelemetryInfo* T) {
+  static bool classof(const TelemetryInfo *T) {
     return T->getEntryKind() == VendorEntryKind::Startup;
   }
 
-  KindType getEntryKind() const override { return VendorEntryKind::Startup;}
+  KindType getEntryKind() const override { return VendorEntryKind::Startup; }
 
   void serializeToStream(llvm::raw_ostream &OS) const override {
-    OS<< "UUID:" << SessionUuid << "\n";
+    OS << "UUID:" << SessionUuid << "\n";
     OS << "MagicStartupMsg:" << MagicStartupMsg << "\n";
   }
 
   json::Object serializeToJson() const override {
     return json::Object{
-      {"Startup", { {"UUID", SessionUuid},
-                    {"MagicStartupMsg", MagicStartupMsg}}},
+        {"Startup",
+         {{"UUID", SessionUuid}, {"MagicStartupMsg", MagicStartupMsg}}},
     };
   }
 };
@@ -117,11 +115,11 @@ struct ExitEvent : public VendorCommonTelemetryInfo {
     MagicExitMsg = E.MagicExitMsg;
   }
 
-  static bool classof(const TelemetryInfo* T) {
+  static bool classof(const TelemetryInfo *T) {
     return T->getEntryKind() == VendorEntryKind::Exit;
   }
 
-  unsigned getEntryKind() const override { return VendorEntryKind::Exit;}
+  unsigned getEntryKind() const override { return VendorEntryKind::Exit; }
 
   void serializeToStream(llvm::raw_ostream &OS) const override {
     OS << "UUID:" << SessionUuid << "\n";
@@ -131,14 +129,14 @@ struct ExitEvent : public VendorCommonTelemetryInfo {
   }
 
   json::Object serializeToJson() const override {
-    json::Array I =  json::Array{
+    json::Array I = json::Array{
         {"UUID", SessionUuid},
         {"MagicExitMsg", MagicExitMsg},
     };
     if (ExitDesc.has_value())
       I.push_back(json::Value({"ExitCode", ExitDesc->ExitCode}));
-    return json::Object {
-      {"Exit", std::move(I)},
+    return json::Object{
+        {"Exit", std::move(I)},
     };
   }
 };
@@ -164,20 +162,17 @@ struct CustomTelemetryEvent : public VendorCommonTelemetryInfo {
     }
   }
 
-  json::Object serializeToJson() const  override {
+  json::Object serializeToJson() const override {
     json::Object Inner;
     int I = 0;
     for (const std::string &M : Msgs) {
       Inner.try_emplace(("MSG_" + llvm::Twine(I)).str(), M);
       ++I;
     }
-    return json::Object {
-      {"Midpoint", std::move(Inner)}}
-    ;
+    return json::Object{{"Midpoint", std::move(Inner)}};
   }
 };
 
-
 // The following classes demonstrate how downstream code can
 // define one or more custom TelemetryDestination(s) to handle
 // Telemetry data differently, specifically:
@@ -185,24 +180,24 @@ struct CustomTelemetryEvent : public VendorCommonTelemetryInfo {
 //    + where to send the data
 //    + in what form
 
-const std::string STRING_DEST( "STRING");
-const std::string JSON_DEST ("JSON");
+const std::string STRING_DEST("STRING");
+const std::string JSON_DEST("JSON");
 
 // This Destination sends data to a std::string given at ctor.
 class StringDestination : public TelemetryDestination {
 public:
   // ShouldSanitize: if true, sanitize the data before emitting, otherwise, emit
   // the full set.
-  StringDestination(bool ShouldSanitize, std::string& Buf)
-      : ShouldSanitize(ShouldSanitize), OS(Buf) {
-  }
+  StringDestination(bool ShouldSanitize, std::string &Buf)
+      : ShouldSanitize(ShouldSanitize), OS(Buf) {}
 
   Error emitEntry(const TelemetryInfo *Entry) override {
     if (isa<VendorCommonTelemetryInfo>(Entry)) {
       if (auto *E = dyn_cast<VendorCommonTelemetryInfo>(Entry)) {
         if (ShouldSanitize) {
           if (isa<StartupEvent>(E) || isa<ExitEvent>(E)) {
-            // There is nothing to sanitize for this type of data, so keep as-is.
+            // There is nothing to sanitize for this type of data, so keep
+            // as-is.
             E->serializeToStream(OS);
           } else if (isa<CustomTelemetryEvent>(E)) {
             auto Sanitized = sanitizeFields(dyn_cast<CustomTelemetryEvent>(E));
@@ -221,65 +216,63 @@ class StringDestination : public TelemetryDestination {
     return Error::success();
   }
 
-  std::string name() const override { return STRING_DEST;}
+  std::string name() const override { return STRING_DEST; }
 
 private:
   // Returns a copy of the given entry, but with some fields sanitized.
-  CustomTelemetryEvent sanitizeFields(const CustomTelemetryEvent* Entry) {
+  CustomTelemetryEvent sanitizeFields(const CustomTelemetryEvent *Entry) {
     CustomTelemetryEvent Sanitized(*Entry);
     // Pretend that messages stored at ODD positions are "sensitive",
     // hence need to be sanitized away.
     int S = Sanitized.Msgs.size() - 1;
     for (int I = S % 2 == 0 ? S - 1 : S; I >= 0; I -= 2)
-      Sanitized.Msgs[I]="";
-      return Sanitized;
+      Sanitized.Msgs[I] = "";
+    return Sanitized;
   }
 
   bool ShouldSanitize;
   llvm::raw_string_ostream OS;
-
 };
 
 // This Destination sends data to some "blackbox" in form of JSON.
 class JsonStreamDestination : public TelemetryDestination {
 public:
-  JsonStreamDestination(bool ShouldSanitize)
-      : ShouldSanitize(ShouldSanitize) {}
+  JsonStreamDestination(bool ShouldSanitize) : ShouldSanitize(ShouldSanitize) {}
 
   Error emitEntry(const TelemetryInfo *Entry) override {
     if (auto *E = dyn_cast<VendorCommonTelemetryInfo>(Entry)) {
-        if (ShouldSanitize) {
-          if (isa<StartupEvent>(E) || isa<ExitEvent>(E)) {
-            // There is nothing to sanitize for this type of data, so keep as-is.
-            return SendToBlackbox(E->serializeToJson());
-          } else if (isa<CustomTelemetryEvent>(E)) {
-            auto Sanitized = sanitizeFields(dyn_cast<CustomTelemetryEvent>(E));
-            return SendToBlackbox(Sanitized.serializeToJson());
-          } else {
-            llvm_unreachable("unexpected type");
-          }
-        } else {
+      if (ShouldSanitize) {
+        if (isa<StartupEvent>(E) || isa<ExitEvent>(E)) {
+          // There is nothing to sanitize for this type of data, so keep as-is.
           return SendToBlackbox(E->serializeToJson());
+        } else if (isa<CustomTelemetryEvent>(E)) {
+          auto Sanitized = sanitizeFields(dyn_cast<CustomTelemetryEvent>(E));
+          return SendToBlackbox(Sanitized.serializeToJson());
+        } else {
+          llvm_unreachable("unexpected type");
         }
-      }  else {
+      } else {
+        return SendToBlackbox(E->serializeToJson());
+      }
+    } else {
       // Unfamiliar entries, just send the entry's UUID
-     return SendToBlackbox(json::Object{{"UUID", Entry->SessionUuid}});
+      return SendToBlackbox(json::Object{{"UUID", Entry->SessionUuid}});
     }
     return make_error<StringError>("unhandled codepath in emitEntry",
-                                        inconvertibleErrorCode());
+                                   inconvertibleErrorCode());
   }
 
-  std::string name() const override { return JSON_DEST;}
-private:
+  std::string name() const override { return JSON_DEST; }
 
+private:
   // Returns a copy of the given entry, but with some fields sanitized.
-  CustomTelemetryEvent sanitizeFields(const CustomTelemetryEvent* Entry) {
+  CustomTelemetryEvent sanitizeFields(const CustomTelemetryEvent *Entry) {
     CustomTelemetryEvent Sanitized(*Entry);
     // Pretend that messages stored at EVEN positions are "sensitive",
     // hence need to be sanitized away.
     int S = Sanitized.Msgs.size() - 1;
     for (int I = S % 2 == 0 ? S : S - 1; I >= 0; I -= 2)
-      Sanitized.Msgs[I]="";
+      Sanitized.Msgs[I] = "";
 
     return Sanitized;
   }
@@ -300,29 +293,35 @@ class TestTelemeter : public Telemeter {
 public:
   TestTelemeter(std::string SessionUuid) : Uuid(SessionUuid), Counter(0) {}
 
-  static std::unique_ptr<TestTelemeter> createInstance(TelemetryConfig *config) {
-    if (!config->EnableTelemetry) return std::unique_ptr<TestTelemeter>(nullptr);
-      std::unique_ptr<TestTelemeter> Telemeter = std::make_unique<TestTelemeter>(nextUuid());
-      // Set up Destination based on the given config.
-      for (const std::string &Dest : config->AdditionalDestinations) {
-        // The destination(s) are ALSO defined by vendor, so it should understand
-        // what the name of each destination signifies.
-        if (Dest == JSON_DEST) {
-          Telemeter->addDestination(new vendor_code::JsonStreamDestination(SanitizeData));
-        } else if (Dest == STRING_DEST) {
-          Telemeter->addDestination(new vendor_code::StringDestination(SanitizeData, Buffer));
-        } else {
-          llvm_unreachable(llvm::Twine("unknown destination: ", Dest).str().c_str());
-        }
+  static std::unique_ptr<TestTelemeter>
+  createInstance(TelemetryConfig *config) {
+    if (!config->EnableTelemetry)
+      return std::unique_ptr<TestTelemeter>(nullptr);
+    std::unique_ptr<TestTelemeter> Telemeter =
+        std::make_unique<TestTelemeter>(nextUuid());
+    // Set up Destination based on the given config.
+    for (const std::string &Dest : config->AdditionalDestinations) {
+      // The destination(s) are ALSO defined by vendor, so it should understand
+      // what the name of each destination signifies.
+      if (Dest == JSON_DEST) {
+        Telemeter->addDestination(
+            new vendor_code::JsonStreamDestination(SanitizeData));
+      } else if (Dest == STRING_DEST) {
+        Telemeter->addDestination(
+            new vendor_code::StringDestination(SanitizeData, Buffer));
+      } else {
+        llvm_unreachable(
+            llvm::Twine("unknown destination: ", Dest).str().c_str());
       }
-      return Telemeter;
+    }
+    return Telemeter;
   }
 
   void logStartup(llvm::StringRef ToolPath, TelemetryInfo *Entry) override {
     ToolName = ToolPath.str();
 
     // The vendor can add additional stuff to the entry before logging.
-    if (auto* S = dyn_cast<StartupEvent>(Entry)) {
+    if (auto *S = dyn_cast<StartupEvent>(Entry)) {
       S->MagicStartupMsg = llvm::Twine("One_", ToolPath).str();
     }
     emitToDestinations(Entry);
@@ -330,7 +329,7 @@ class TestTelemeter : public Telemeter {
 
   void logExit(llvm::StringRef ToolPath, TelemetryInfo *Entry) override {
     // Ensure we're shutting down the same tool we started with.
-    if (ToolPath != ToolName){
+    if (ToolPath != ToolName) {
       std::string Str;
       raw_string_ostream OS(Str);
       OS << "Expected tool with name" << ToolName << ", but got " << ToolPath;
@@ -338,20 +337,20 @@ class TestTelemeter : public Telemeter {
     }
 
     // The vendor can add additional stuff to the entry before logging.
-    if (auto * E = dyn_cast<ExitEvent>(Entry)) {
+    if (auto *E = dyn_cast<ExitEvent>(Entry)) {
       E->MagicExitMsg = llvm::Twine("Three_", ToolPath).str();
     }
 
     emitToDestinations(Entry);
   }
 
-  void addDestination(TelemetryDestination* Dest) override {
+  void addDestination(TelemetryDestination *Dest) override {
     Destinations.push_back(Dest);
   }
 
   void logMidpoint(TelemetryInfo *Entry) {
     // The custom Telemeter can record and send additional data.
-    if (auto * C = dyn_cast<CustomTelemetryEvent>(Entry)) {
+    if (auto *C = dyn_cast<CustomTelemetryEvent>(Entry)) {
       C->Msgs.push_back("Two");
       C->Msgs.push_back("Deux");
       C->Msgs.push_back("Zwei");
@@ -361,23 +360,22 @@ class TestTelemeter : public Telemeter {
   }
 
   ~TestTelemeter() {
-    for (auto* Dest : Destinations)
+    for (auto *Dest : Destinations)
       delete Dest;
   }
 
-  template <typename T>
-  T makeDefaultTelemetryInfo() {
+  template <typename T> T makeDefaultTelemetryInfo() {
     T Ret;
     Ret.SessionUuid = Uuid;
     Ret.Counter = Counter++;
     return Ret;
   }
-private:
 
+private:
   void emitToDestinations(TelemetryInfo *Entry) {
     for (TelemetryDestination *Dest : Destinations) {
       llvm::Error err = Dest->emitEntry(Entry);
-      if(err) {
+      if (err) {
         // Log it and move on.
       }
     }
@@ -400,7 +398,7 @@ void ApplyVendorSpecificConfigs(TelemetryConfig *config) {
 
 namespace {
 
-void ApplyCommonConfig(llvm::telemetry::TelemetryConfig* config) {
+void ApplyCommonConfig(llvm::telemetry::TelemetryConfig *config) {
   // Any shareable configs for the upstream tool can go here.
   // .....
 }
@@ -435,23 +433,27 @@ using namespace llvm::telemetry;
 //
 // Preset StartTime to EPOCH.
 auto StartTime = std::chrono::time_point<std::chrono::steady_clock>{};
-// Pretend the time it takes for the tool's initialization is EPOCH + 5 milliseconds
+// Pretend the time it takes for the tool's initialization is EPOCH + 5
+// milliseconds
 auto InitCompleteTime = StartTime + std::chrono::milliseconds(5);
 auto MidPointTime = StartTime + std::chrono::milliseconds(10);
 auto MidPointCompleteTime = MidPointTime + std::chrono::milliseconds(5);
 // Preset ExitTime to EPOCH + 20 milliseconds
 auto ExitTime = StartTime + std::chrono::milliseconds(20);
-// Pretend the time it takes to complete tearing down the tool is 10 milliseconds.
+// Pretend the time it takes to complete tearing down the tool is 10
+// milliseconds.
 auto ExitCompleteTime = ExitTime + std::chrono::milliseconds(10);
 
-void AtToolStart(std::string ToolName, vendor_code::TestTelemeter* T) {
-  vendor_code::StartupEvent Entry = T->makeDefaultTelemetryInfo<vendor_code::StartupEvent>();
+void AtToolStart(std::string ToolName, vendor_code::TestTelemeter *T) {
+  vendor_code::StartupEvent Entry =
+      T->makeDefaultTelemetryInfo<vendor_code::StartupEvent>();
   Entry.Stats = {StartTime, InitCompleteTime};
   T->logStartup(ToolName, &Entry);
 }
 
-void AtToolExit(std::string ToolName, vendor_code::TestTelemeter* T) {
-  vendor_code::ExitEvent Entry = T->makeDefaultTelemetryInfo<vendor_code::ExitEvent>();
+void AtToolExit(std::string ToolName, vendor_code::TestTelemeter *T) {
+  vendor_code::ExitEvent Entry =
+      T->makeDefaultTelemetryInfo<vendor_code::ExitEvent>();
   Entry.Stats = {ExitTime, ExitCompleteTime};
 
   if (HasExitError) {
@@ -460,14 +462,15 @@ void AtToolExit(std::string ToolName, vendor_code::TestTelemeter* T) {
   T->logExit(ToolName, &Entry);
 }
 
-void AtToolMidPoint (vendor_code::TestTelemeter* T) {
-  vendor_code::CustomTelemetryEvent Entry = T->makeDefaultTelemetryInfo<vendor_code::CustomTelemetryEvent>();
+void AtToolMidPoint(vendor_code::TestTelemeter *T) {
+  vendor_code::CustomTelemetryEvent Entry =
+      T->makeDefaultTelemetryInfo<vendor_code::CustomTelemetryEvent>();
   Entry.Stats = {MidPointTime, MidPointCompleteTime};
   T->logMidpoint(&Entry);
 }
 
 // Helper function to print the given object content to string.
-static std::string ValueToString(const json::Value* V) {
+static std::string ValueToString(const json::Value *V) {
   std::string Ret;
   llvm::raw_string_ostream P(Ret);
   P << *V;
@@ -477,7 +480,8 @@ static std::string ValueToString(const json::Value* V) {
 // Without vendor's implementation, telemetry is not enabled by default.
 TEST(TelemetryTest, TelemetryDefault) {
   HasVendorConfig = false;
-  std::shared_ptr<llvm::telemetry::TelemetryConfig> Config = GetTelemetryConfig();
+  std::shared_ptr<llvm::telemetry::TelemetryConfig> Config =
+      GetTelemetryConfig();
   auto Tool = vendor_code::TestTelemeter::createInstance(Config.get());
 
   EXPECT_EQ(nullptr, Tool.get());
@@ -492,7 +496,8 @@ TEST(TelemetryTest, TelemetryEnabled) {
   Buffer = "";
   EmittedJsons.clear();
 
-  std::shared_ptr<llvm::telemetry::TelemetryConfig> Config = GetTelemetryConfig();
+  std::shared_ptr<llvm::telemetry::TelemetryConfig> Config =
+      GetTelemetryConfig();
 
   // Add some destinations
   Config->AdditionalDestinations.push_back(vendor_code::STRING_DEST);
@@ -504,7 +509,6 @@ TEST(TelemetryTest, TelemetryEnabled) {
   AtToolMidPoint(Tool.get());
   AtToolExit(ToolName, Tool.get());
 
-
   // Check that the StringDestination emitted properly
   {
     std::string ExpectedBuff = "UUID:1111\n"
@@ -524,20 +528,22 @@ TEST(TelemetryTest, TelemetryEnabled) {
     // There should be 3 events emitted by the Telemeter (start, midpoint, exit)
     EXPECT_EQ(3, EmittedJsons.size());
 
-    const json::Value* StartupEntry = EmittedJsons[0].get("Startup");
+    const json::Value *StartupEntry = EmittedJsons[0].get("Startup");
     ASSERT_NE(StartupEntry, nullptr);
-    EXPECT_STREQ("[[\"UUID\",\"1111\"],[\"MagicStartupMsg\",\"One_TestToolOne\"]]",
-                 ValueToString(StartupEntry).c_str());
+    EXPECT_STREQ(
+        "[[\"UUID\",\"1111\"],[\"MagicStartupMsg\",\"One_TestToolOne\"]]",
+        ValueToString(StartupEntry).c_str());
 
-    const json::Value* MidpointEntry = EmittedJsons[1].get("Midpoint");
+    const json::Value *MidpointEntry = EmittedJsons[1].get("Midpoint");
     ASSERT_NE(MidpointEntry, nullptr);
     EXPECT_STREQ("{\"MSG_0\":\"Two\",\"MSG_1\":\"Deux\",\"MSG_2\":\"Zwei\"}",
                  ValueToString(MidpointEntry).c_str());
 
-    const json::Value* ExitEntry = EmittedJsons[2].get("Exit");
+    const json::Value *ExitEntry = EmittedJsons[2].get("Exit");
     ASSERT_NE(ExitEntry, nullptr);
-    EXPECT_STREQ("[[\"UUID\",\"1111\"],[\"MagicExitMsg\",\"Three_TestToolOne\"]]",
-                 ValueToString(ExitEntry).c_str());
+    EXPECT_STREQ(
+        "[[\"UUID\",\"1111\"],[\"MagicExitMsg\",\"Three_TestToolOne\"]]",
+        ValueToString(ExitEntry).c_str());
   }
 }
 
@@ -552,7 +558,8 @@ TEST(TelemetryTest, TelemetryEnabledSanitizeData) {
   Buffer = "";
   EmittedJsons.clear();
 
-  std::shared_ptr<llvm::telemetry::TelemetryConfig> Config = GetTelemetryConfig();
+  std::shared_ptr<llvm::telemetry::TelemetryConfig> Config =
+      GetTelemetryConfig();
 
   // Add some destinations
   Config->AdditionalDestinations.push_back(vendor_code::STRING_DEST);
@@ -564,14 +571,13 @@ TEST(TelemetryTest, TelemetryEnabledSanitizeData) {
   AtToolMidPoint(Tool.get());
   AtToolExit(ToolName, Tool.get());
 
-
   // Check that the StringDestination emitted properly
   {
     // The StringDestination should have removed the odd-positioned msgs.
     std::string ExpectedBuff = "UUID:1111\n"
                                "MagicStartupMsg:One_TestToolOne\n"
                                "MSG_0:Two\n"
-                               "MSG_1:\n"    // was sannitized away.
+                               "MSG_1:\n" // was sannitized away.
                                "MSG_2:Zwei\n"
                                "UUID:1111\n"
                                "MagicExitMsg:Three_TestToolOne\n";
@@ -585,21 +591,23 @@ TEST(TelemetryTest, TelemetryEnabledSanitizeData) {
     // There should be 3 events emitted by the Telemeter (start, midpoint, exit)
     EXPECT_EQ(3, EmittedJsons.size());
 
-    const json::Value* StartupEntry = EmittedJsons[0].get("Startup");
+    const json::Value *StartupEntry = EmittedJsons[0].get("Startup");
     ASSERT_NE(StartupEntry, nullptr);
-    EXPECT_STREQ("[[\"UUID\",\"1111\"],[\"MagicStartupMsg\",\"One_TestToolOne\"]]",
-                 ValueToString(StartupEntry).c_str());
+    EXPECT_STREQ(
+        "[[\"UUID\",\"1111\"],[\"MagicStartupMsg\",\"One_TestToolOne\"]]",
+        ValueToString(StartupEntry).c_str());
 
-    const json::Value* MidpointEntry = EmittedJsons[1].get("Midpoint");
+    const json::Value *MidpointEntry = EmittedJsons[1].get("Midpoint");
     ASSERT_NE(MidpointEntry, nullptr);
     // The JsonDestination should have removed the even-positioned msgs.
     EXPECT_STREQ("{\"MSG_0\":\"\",\"MSG_1\":\"Deux\",\"MSG_2\":\"\"}",
                  ValueToString(MidpointEntry).c_str());
 
-    const json::Value* ExitEntry = EmittedJsons[2].get("Exit");
+    const json::Value *ExitEntry = EmittedJsons[2].get("Exit");
     ASSERT_NE(ExitEntry, nullptr);
-    EXPECT_STREQ("[[\"UUID\",\"1111\"],[\"MagicExitMsg\",\"Three_TestToolOne\"]]",
-                 ValueToString(ExitEntry).c_str());
+    EXPECT_STREQ(
+        "[[\"UUID\",\"1111\"],[\"MagicExitMsg\",\"Three_TestToolOne\"]]",
+        ValueToString(ExitEntry).c_str());
   }
 }
 

>From 1378ed4c0358b2d9ff51a3a15766e20ce3472687 Mon Sep 17 00:00:00 2001
From: Vy Nguyen <vyng at google.com>
Date: Thu, 29 Aug 2024 11:32:13 -0400
Subject: [PATCH 06/10]  more formatting

---
 llvm/unittests/Telemetry/TelemetryTest.cpp | 88 +++++++++++++---------
 1 file changed, 54 insertions(+), 34 deletions(-)

diff --git a/llvm/unittests/Telemetry/TelemetryTest.cpp b/llvm/unittests/Telemetry/TelemetryTest.cpp
index b89bc584717da5..cd3c61478c0b34 100644
--- a/llvm/unittests/Telemetry/TelemetryTest.cpp
+++ b/llvm/unittests/Telemetry/TelemetryTest.cpp
@@ -24,14 +24,17 @@
 #include <ctime>
 #include <vector>
 
-
-// Testing parameters.ve
+// Testing parameters.
+// These are set by each test to force certain outcomes.
+// Since the tests may be run in parellel, these should probably
+// be thread_local.
 static thread_local bool HasExitError = false;
 static thread_local std::string ExitMsg = "";
 static thread_local bool HasVendorConfig = false;
 static thread_local bool SanitizeData = false;
 static thread_local std::string Buffer = "";
 static thread_local std::vector<llvm::json::Object> EmittedJsons;
+static thread_local std::string ExpectedUuid = "";
 
 namespace llvm {
 namespace telemetry {
@@ -39,8 +42,8 @@ namespace vendor_code {
 
 // Generate unique (but deterministic "uuid" for testing purposes).
 static std::string nextUuid() {
-  static size_t seed = 1111;
-  return std::to_string(seed++);
+  static std::atomic<int> seed = 1111;
+  return std::to_string(seed.fetch_add(1, std::memory_order_acquire));
 }
 
 struct VendorEntryKind {
@@ -54,7 +57,6 @@ struct VendorEntryKind {
 // by downstream code to store additional data as needed.
 // It can also define additional data serialization method.
 struct VendorCommonTelemetryInfo : public TelemetryInfo {
-
   static bool classof(const TelemetryInfo *T) {
     // Subclasses of this is also acceptable.
     return (T->getEntryKind() & VendorEntryKind::VendorCommon) ==
@@ -155,6 +157,7 @@ struct CustomTelemetryEvent : public VendorCommonTelemetryInfo {
   }
 
   void serializeToStream(llvm::raw_ostream &OS) const override {
+    OS << "UUID:" << SessionUuid << "\n";
     int I = 0;
     for (const std::string &M : Msgs) {
       OS << "MSG_" << I << ":" << M << "\n";
@@ -164,11 +167,13 @@ struct CustomTelemetryEvent : public VendorCommonTelemetryInfo {
 
   json::Object serializeToJson() const override {
     json::Object Inner;
+    Inner.try_emplace ("UUID", SessionUuid);
     int I = 0;
     for (const std::string &M : Msgs) {
       Inner.try_emplace(("MSG_" + llvm::Twine(I)).str(), M);
       ++I;
     }
+
     return json::Object{{"Midpoint", std::move(Inner)}};
   }
 };
@@ -295,10 +300,12 @@ class TestTelemeter : public Telemeter {
 
   static std::unique_ptr<TestTelemeter>
   createInstance(TelemetryConfig *config) {
+    llvm::errs() << "============================== createInstance is called" << "\n";
     if (!config->EnableTelemetry)
-      return std::unique_ptr<TestTelemeter>(nullptr);
+      return nullptr;
+    ExpectedUuid = nextUuid();
     std::unique_ptr<TestTelemeter> Telemeter =
-        std::make_unique<TestTelemeter>(nextUuid());
+        std::make_unique<TestTelemeter>(ExpectedUuid);
     // Set up Destination based on the given config.
     for (const std::string &Dest : config->AdditionalDestinations) {
       // The destination(s) are ALSO defined by vendor, so it should understand
@@ -359,6 +366,8 @@ class TestTelemeter : public Telemeter {
     emitToDestinations(Entry);
   }
 
+  const std::string& getUuid() const {return Uuid;}
+
   ~TestTelemeter() {
     for (auto *Dest : Destinations)
       delete Dest;
@@ -412,12 +421,13 @@ std::shared_ptr<llvm::telemetry::TelemetryConfig> GetTelemetryConfig() {
   ApplyCommonConfig(Config.get());
 
   // Apply vendor specific config, if present.
-  // In practice, this would be a build-time param.
+  // In principle, this would be a build-time param, configured by the vendor.
   // Eg:
   //
   // #ifdef HAS_VENDOR_TELEMETRY_CONFIG
   //     llvm::telemetry::vendor_code::ApplyVendorSpecificConfigs(config.get());
   // #endif
+  //
   // But for unit testing, we use the testing params defined at the top.
   if (HasVendorConfig) {
     llvm::telemetry::vendor_code::ApplyVendorSpecificConfigs(Config.get());
@@ -488,12 +498,12 @@ TEST(TelemetryTest, TelemetryDefault) {
 }
 
 TEST(TelemetryTest, TelemetryEnabled) {
-  const std::string ToolName = "TestToolOne";
+  const std::string ToolName = "TelemetryTest";
 
   // Preset some test params.
   HasVendorConfig = true;
   SanitizeData = false;
-  Buffer = "";
+  Buffer.clear();
   EmittedJsons.clear();
 
   std::shared_ptr<llvm::telemetry::TelemetryConfig> Config =
@@ -509,17 +519,21 @@ TEST(TelemetryTest, TelemetryEnabled) {
   AtToolMidPoint(Tool.get());
   AtToolExit(ToolName, Tool.get());
 
+  // Check that the Tool uses the expected UUID.
+  EXPECT_STREQ(Tool->getUuid().c_str(), ExpectedUuid.c_str());
+
   // Check that the StringDestination emitted properly
   {
-    std::string ExpectedBuff = "UUID:1111\n"
-                               "MagicStartupMsg:One_TestToolOne\n"
-                               "MSG_0:Two\n"
-                               "MSG_1:Deux\n"
-                               "MSG_2:Zwei\n"
-                               "UUID:1111\n"
-                               "MagicExitMsg:Three_TestToolOne\n";
+    std::string ExpectedBuffer = ("UUID:" + llvm::Twine(ExpectedUuid) + "\n" +
+                                  "MagicStartupMsg:One_" + llvm::Twine(ToolName) + "\n" +
+                                  "UUID:" + llvm::Twine(ExpectedUuid) + "\n" +
+                                  "MSG_0:Two\n" +
+                                  "MSG_1:Deux\n" +
+                                  "MSG_2:Zwei\n" +
+                                  "UUID:" + llvm::Twine(ExpectedUuid) + "\n" +
+                                  "MagicExitMsg:Three_" + llvm::Twine(ToolName) + "\n").str();
 
-    EXPECT_STREQ(ExpectedBuff.c_str(), Buffer.c_str());
+    EXPECT_STREQ(ExpectedBuffer.c_str(), Buffer.c_str());
   }
 
   // Check that the JsonDestination emitted properly
@@ -531,18 +545,21 @@ TEST(TelemetryTest, TelemetryEnabled) {
     const json::Value *StartupEntry = EmittedJsons[0].get("Startup");
     ASSERT_NE(StartupEntry, nullptr);
     EXPECT_STREQ(
-        "[[\"UUID\",\"1111\"],[\"MagicStartupMsg\",\"One_TestToolOne\"]]",
+        ("[[\"UUID\",\"" + llvm::Twine(ExpectedUuid) + "\"],[\"MagicStartupMsg\",\"One_" + llvm::Twine(ToolName)+"\"]]").str().c_str(),
         ValueToString(StartupEntry).c_str());
 
     const json::Value *MidpointEntry = EmittedJsons[1].get("Midpoint");
     ASSERT_NE(MidpointEntry, nullptr);
-    EXPECT_STREQ("{\"MSG_0\":\"Two\",\"MSG_1\":\"Deux\",\"MSG_2\":\"Zwei\"}",
+    // TODO: This is a bit flaky in that the json string printer sort the entries (for now),
+    // so the "UUID" field is put at the end of the array even though it was emitted first.
+    EXPECT_STREQ(("{\"MSG_0\":\"Two\",\"MSG_1\":\"Deux\",\"MSG_2\":\"Zwei\",\"UUID\":\""
+                  + llvm::Twine(ExpectedUuid) + "\"}").str().c_str(),
                  ValueToString(MidpointEntry).c_str());
 
     const json::Value *ExitEntry = EmittedJsons[2].get("Exit");
     ASSERT_NE(ExitEntry, nullptr);
     EXPECT_STREQ(
-        "[[\"UUID\",\"1111\"],[\"MagicExitMsg\",\"Three_TestToolOne\"]]",
+        ("[[\"UUID\",\"" + llvm::Twine(ExpectedUuid) + "\"],[\"MagicExitMsg\",\"Three_" + llvm::Twine(ToolName)+"\"]]").str().c_str(),
         ValueToString(ExitEntry).c_str());
   }
 }
@@ -550,12 +567,12 @@ TEST(TelemetryTest, TelemetryEnabled) {
 // Similar to previous tests, but toggling the data-sanitization option ON.
 // The recorded data should have some fields removed.
 TEST(TelemetryTest, TelemetryEnabledSanitizeData) {
-  const std::string ToolName = "TestToolOne";
+  const std::string ToolName = "TelemetryTest_SanitizedData";
 
   // Preset some test params.
   HasVendorConfig = true;
   SanitizeData = true;
-  Buffer = "";
+  Buffer.clear();
   EmittedJsons.clear();
 
   std::shared_ptr<llvm::telemetry::TelemetryConfig> Config =
@@ -574,15 +591,16 @@ TEST(TelemetryTest, TelemetryEnabledSanitizeData) {
   // Check that the StringDestination emitted properly
   {
     // The StringDestination should have removed the odd-positioned msgs.
-    std::string ExpectedBuff = "UUID:1111\n"
-                               "MagicStartupMsg:One_TestToolOne\n"
-                               "MSG_0:Two\n"
-                               "MSG_1:\n" // was sannitized away.
-                               "MSG_2:Zwei\n"
-                               "UUID:1111\n"
-                               "MagicExitMsg:Three_TestToolOne\n";
 
-    EXPECT_STREQ(ExpectedBuff.c_str(), Buffer.c_str());
+    std::string ExpectedBuffer = ("UUID:" + llvm::Twine(ExpectedUuid) + "\n" +
+                                  "MagicStartupMsg:One_" + llvm::Twine(ToolName) + "\n" +
+                                  "UUID:" + llvm::Twine(ExpectedUuid) + "\n" +
+                                  "MSG_0:Two\n" +
+                                  "MSG_1:\n" + // <<< was sanitized away.
+                                  "MSG_2:Zwei\n" +
+                                  "UUID:" + llvm::Twine(ExpectedUuid) + "\n" +
+                                  "MagicExitMsg:Three_" + llvm::Twine(ToolName) + "\n").str();
+    EXPECT_STREQ(ExpectedBuffer.c_str(), Buffer.c_str());
   }
 
   // Check that the JsonDestination emitted properly
@@ -594,19 +612,21 @@ TEST(TelemetryTest, TelemetryEnabledSanitizeData) {
     const json::Value *StartupEntry = EmittedJsons[0].get("Startup");
     ASSERT_NE(StartupEntry, nullptr);
     EXPECT_STREQ(
-        "[[\"UUID\",\"1111\"],[\"MagicStartupMsg\",\"One_TestToolOne\"]]",
+        ("[[\"UUID\",\"" + llvm::Twine(ExpectedUuid) + "\"],[\"MagicStartupMsg\",\"One_" + llvm::Twine(ToolName)+"\"]]").str().c_str(),
         ValueToString(StartupEntry).c_str());
 
     const json::Value *MidpointEntry = EmittedJsons[1].get("Midpoint");
     ASSERT_NE(MidpointEntry, nullptr);
     // The JsonDestination should have removed the even-positioned msgs.
-    EXPECT_STREQ("{\"MSG_0\":\"\",\"MSG_1\":\"Deux\",\"MSG_2\":\"\"}",
+    EXPECT_STREQ(("{\"MSG_0\":\"\",\"MSG_1\":\"Deux\",\"MSG_2\":\"\",\"UUID\":\""
+                  + llvm::Twine(ExpectedUuid) + "\"}").str().c_str(),
                  ValueToString(MidpointEntry).c_str());
 
+
     const json::Value *ExitEntry = EmittedJsons[2].get("Exit");
     ASSERT_NE(ExitEntry, nullptr);
     EXPECT_STREQ(
-        "[[\"UUID\",\"1111\"],[\"MagicExitMsg\",\"Three_TestToolOne\"]]",
+        ("[[\"UUID\",\"" + llvm::Twine(ExpectedUuid) + "\"],[\"MagicExitMsg\",\"Three_" + llvm::Twine(ToolName)+"\"]]").str().c_str(),
         ValueToString(ExitEntry).c_str());
   }
 }

>From 02e750ec91015fba0acaee1b3527f790417c0cb8 Mon Sep 17 00:00:00 2001
From: Vy Nguyen <vyng at google.com>
Date: Thu, 29 Aug 2024 12:47:46 -0400
Subject: [PATCH 07/10] more formatting changes

---
 llvm/unittests/Telemetry/TelemetryTest.cpp | 95 +++++++++++++---------
 1 file changed, 56 insertions(+), 39 deletions(-)

diff --git a/llvm/unittests/Telemetry/TelemetryTest.cpp b/llvm/unittests/Telemetry/TelemetryTest.cpp
index cd3c61478c0b34..fde7e31802f682 100644
--- a/llvm/unittests/Telemetry/TelemetryTest.cpp
+++ b/llvm/unittests/Telemetry/TelemetryTest.cpp
@@ -167,7 +167,7 @@ struct CustomTelemetryEvent : public VendorCommonTelemetryInfo {
 
   json::Object serializeToJson() const override {
     json::Object Inner;
-    Inner.try_emplace ("UUID", SessionUuid);
+    Inner.try_emplace("UUID", SessionUuid);
     int I = 0;
     for (const std::string &M : Msgs) {
       Inner.try_emplace(("MSG_" + llvm::Twine(I)).str(), M);
@@ -300,7 +300,8 @@ class TestTelemeter : public Telemeter {
 
   static std::unique_ptr<TestTelemeter>
   createInstance(TelemetryConfig *config) {
-    llvm::errs() << "============================== createInstance is called" << "\n";
+    llvm::errs() << "============================== createInstance is called"
+                 << "\n";
     if (!config->EnableTelemetry)
       return nullptr;
     ExpectedUuid = nextUuid();
@@ -366,7 +367,7 @@ class TestTelemeter : public Telemeter {
     emitToDestinations(Entry);
   }
 
-  const std::string& getUuid() const {return Uuid;}
+  const std::string &getUuid() const { return Uuid; }
 
   ~TestTelemeter() {
     for (auto *Dest : Destinations)
@@ -524,14 +525,13 @@ TEST(TelemetryTest, TelemetryEnabled) {
 
   // Check that the StringDestination emitted properly
   {
-    std::string ExpectedBuffer = ("UUID:" + llvm::Twine(ExpectedUuid) + "\n" +
-                                  "MagicStartupMsg:One_" + llvm::Twine(ToolName) + "\n" +
-                                  "UUID:" + llvm::Twine(ExpectedUuid) + "\n" +
-                                  "MSG_0:Two\n" +
-                                  "MSG_1:Deux\n" +
-                                  "MSG_2:Zwei\n" +
-                                  "UUID:" + llvm::Twine(ExpectedUuid) + "\n" +
-                                  "MagicExitMsg:Three_" + llvm::Twine(ToolName) + "\n").str();
+    std::string ExpectedBuffer =
+        ("UUID:" + llvm::Twine(ExpectedUuid) + "\n" + "MagicStartupMsg:One_" +
+         llvm::Twine(ToolName) + "\n" + "UUID:" + llvm::Twine(ExpectedUuid) +
+         "\n" + "MSG_0:Two\n" + "MSG_1:Deux\n" + "MSG_2:Zwei\n" +
+         "UUID:" + llvm::Twine(ExpectedUuid) + "\n" + "MagicExitMsg:Three_" +
+         llvm::Twine(ToolName) + "\n")
+            .str();
 
     EXPECT_STREQ(ExpectedBuffer.c_str(), Buffer.c_str());
   }
@@ -544,23 +544,33 @@ TEST(TelemetryTest, TelemetryEnabled) {
 
     const json::Value *StartupEntry = EmittedJsons[0].get("Startup");
     ASSERT_NE(StartupEntry, nullptr);
-    EXPECT_STREQ(
-        ("[[\"UUID\",\"" + llvm::Twine(ExpectedUuid) + "\"],[\"MagicStartupMsg\",\"One_" + llvm::Twine(ToolName)+"\"]]").str().c_str(),
-        ValueToString(StartupEntry).c_str());
+    EXPECT_STREQ(("[[\"UUID\",\"" + llvm::Twine(ExpectedUuid) +
+                  "\"],[\"MagicStartupMsg\",\"One_" + llvm::Twine(ToolName) +
+                  "\"]]")
+                     .str()
+                     .c_str(),
+                 ValueToString(StartupEntry).c_str());
 
     const json::Value *MidpointEntry = EmittedJsons[1].get("Midpoint");
     ASSERT_NE(MidpointEntry, nullptr);
-    // TODO: This is a bit flaky in that the json string printer sort the entries (for now),
-    // so the "UUID" field is put at the end of the array even though it was emitted first.
-    EXPECT_STREQ(("{\"MSG_0\":\"Two\",\"MSG_1\":\"Deux\",\"MSG_2\":\"Zwei\",\"UUID\":\""
-                  + llvm::Twine(ExpectedUuid) + "\"}").str().c_str(),
+    // TODO: This is a bit flaky in that the json string printer sort the
+    // entries (for now), so the "UUID" field is put at the end of the array
+    // even though it was emitted first.
+    EXPECT_STREQ(("{\"MSG_0\":\"Two\",\"MSG_1\":\"Deux\",\"MSG_2\":\"Zwei\","
+                  "\"UUID\":\"" +
+                  llvm::Twine(ExpectedUuid) + "\"}")
+                     .str()
+                     .c_str(),
                  ValueToString(MidpointEntry).c_str());
 
     const json::Value *ExitEntry = EmittedJsons[2].get("Exit");
     ASSERT_NE(ExitEntry, nullptr);
-    EXPECT_STREQ(
-        ("[[\"UUID\",\"" + llvm::Twine(ExpectedUuid) + "\"],[\"MagicExitMsg\",\"Three_" + llvm::Twine(ToolName)+"\"]]").str().c_str(),
-        ValueToString(ExitEntry).c_str());
+    EXPECT_STREQ(("[[\"UUID\",\"" + llvm::Twine(ExpectedUuid) +
+                  "\"],[\"MagicExitMsg\",\"Three_" + llvm::Twine(ToolName) +
+                  "\"]]")
+                     .str()
+                     .c_str(),
+                 ValueToString(ExitEntry).c_str());
   }
 }
 
@@ -592,14 +602,13 @@ TEST(TelemetryTest, TelemetryEnabledSanitizeData) {
   {
     // The StringDestination should have removed the odd-positioned msgs.
 
-    std::string ExpectedBuffer = ("UUID:" + llvm::Twine(ExpectedUuid) + "\n" +
-                                  "MagicStartupMsg:One_" + llvm::Twine(ToolName) + "\n" +
-                                  "UUID:" + llvm::Twine(ExpectedUuid) + "\n" +
-                                  "MSG_0:Two\n" +
-                                  "MSG_1:\n" + // <<< was sanitized away.
-                                  "MSG_2:Zwei\n" +
-                                  "UUID:" + llvm::Twine(ExpectedUuid) + "\n" +
-                                  "MagicExitMsg:Three_" + llvm::Twine(ToolName) + "\n").str();
+    std::string ExpectedBuffer =
+        ("UUID:" + llvm::Twine(ExpectedUuid) + "\n" + "MagicStartupMsg:One_" +
+         llvm::Twine(ToolName) + "\n" + "UUID:" + llvm::Twine(ExpectedUuid) +
+         "\n" + "MSG_0:Two\n" + "MSG_1:\n" + // <<< was sanitized away.
+         "MSG_2:Zwei\n" + "UUID:" + llvm::Twine(ExpectedUuid) + "\n" +
+         "MagicExitMsg:Three_" + llvm::Twine(ToolName) + "\n")
+            .str();
     EXPECT_STREQ(ExpectedBuffer.c_str(), Buffer.c_str());
   }
 
@@ -611,23 +620,31 @@ TEST(TelemetryTest, TelemetryEnabledSanitizeData) {
 
     const json::Value *StartupEntry = EmittedJsons[0].get("Startup");
     ASSERT_NE(StartupEntry, nullptr);
-    EXPECT_STREQ(
-        ("[[\"UUID\",\"" + llvm::Twine(ExpectedUuid) + "\"],[\"MagicStartupMsg\",\"One_" + llvm::Twine(ToolName)+"\"]]").str().c_str(),
-        ValueToString(StartupEntry).c_str());
+    EXPECT_STREQ(("[[\"UUID\",\"" + llvm::Twine(ExpectedUuid) +
+                  "\"],[\"MagicStartupMsg\",\"One_" + llvm::Twine(ToolName) +
+                  "\"]]")
+                     .str()
+                     .c_str(),
+                 ValueToString(StartupEntry).c_str());
 
     const json::Value *MidpointEntry = EmittedJsons[1].get("Midpoint");
     ASSERT_NE(MidpointEntry, nullptr);
     // The JsonDestination should have removed the even-positioned msgs.
-    EXPECT_STREQ(("{\"MSG_0\":\"\",\"MSG_1\":\"Deux\",\"MSG_2\":\"\",\"UUID\":\""
-                  + llvm::Twine(ExpectedUuid) + "\"}").str().c_str(),
-                 ValueToString(MidpointEntry).c_str());
-
+    EXPECT_STREQ(
+        ("{\"MSG_0\":\"\",\"MSG_1\":\"Deux\",\"MSG_2\":\"\",\"UUID\":\"" +
+         llvm::Twine(ExpectedUuid) + "\"}")
+            .str()
+            .c_str(),
+        ValueToString(MidpointEntry).c_str());
 
     const json::Value *ExitEntry = EmittedJsons[2].get("Exit");
     ASSERT_NE(ExitEntry, nullptr);
-    EXPECT_STREQ(
-        ("[[\"UUID\",\"" + llvm::Twine(ExpectedUuid) + "\"],[\"MagicExitMsg\",\"Three_" + llvm::Twine(ToolName)+"\"]]").str().c_str(),
-        ValueToString(ExitEntry).c_str());
+    EXPECT_STREQ(("[[\"UUID\",\"" + llvm::Twine(ExpectedUuid) +
+                  "\"],[\"MagicExitMsg\",\"Three_" + llvm::Twine(ToolName) +
+                  "\"]]")
+                     .str()
+                     .c_str(),
+                 ValueToString(ExitEntry).c_str());
   }
 }
 

>From 63e99fc09412fa6b8f23f9820eb1810dc7012f3d Mon Sep 17 00:00:00 2001
From: Vy Nguyen <vyng at google.com>
Date: Thu, 29 Aug 2024 15:00:29 -0400
Subject: [PATCH 08/10] Added header comment to describe the package

---
 llvm/include/llvm/Telemetry/Telemetry.h | 66 ++++++++++++++++++++++---
 1 file changed, 58 insertions(+), 8 deletions(-)

diff --git a/llvm/include/llvm/Telemetry/Telemetry.h b/llvm/include/llvm/Telemetry/Telemetry.h
index 7084b81c0304ae..d4f192ab9159c5 100644
--- a/llvm/include/llvm/Telemetry/Telemetry.h
+++ b/llvm/include/llvm/Telemetry/Telemetry.h
@@ -5,6 +5,50 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file provides the basic framework for Telemetry
+///
+/// It comprises of three important structs/classes:
+///
+/// - Telemeter: The class responsible for collecting and forwarding
+///              telemery data.
+/// - TelemetryInfo: data courier
+/// - TelemetryConfig: this stores configurations on Telemeter.
+///
+/// This framework is intended to be configurable and extensible:
+///    - Any LLVM tool that wants to use Telemetry can extend/customize it.
+///    - Toolchain vendors can also provide custom implementation/config of the
+///      Telemetry library, which could either overrides or extends the given tool's
+///      upstream implementation, to best fit their organization's usage and
+///      security models.
+///    - End users of such tool can also configure telemetry (as allowed
+///      by their vendor).
+///
+/// Note: There are two important points to highlight about this package:
+///
+///  (0) There is (currently) no concrete implementation of Telemetry in
+///      upstream LLVM. We only provide the abstract API here. Any tool
+///      that wants telemetry will have to implement one.
+///
+///      The reason for this is because all the tools in llvm are
+///      very different in what they care about (what/when/where to instrument)
+///      Hence it might not be practical to a single implementation.
+///      However, if in the future when we see any common pattern, we can
+///      extract them into a shared place. That is TBD - contributions welcomed.
+///
+///  (1) No implementation of Telemetry in upstream LLVM shall directly store
+///      any of the collected data due to privacy and security reasons:
+///        + Different organizations have different opinions on which data
+///          is sensitive and which is not.
+///        + Data ownerships and data collection consents are hare to
+///          accommodate from LLVM developers' point of view.
+///          (Eg., the data collected by Telemetry framework is NOT neccessarily
+///           owned by the user of a LLVM tool with Telemetry enabled, hence
+///           their consent to data collection isn't meaningful. On the other
+///           hand, we have no practical way to request consent from "real" owners.
+//===---------------------------------------------------------------------===//
+
 
 #ifndef LLVM_TELEMETRY_TELEMETRY_H
 #define LLVM_TELEMETRY_TELEMETRY_H
@@ -23,23 +67,26 @@
 namespace llvm {
 namespace telemetry {
 
-using SteadyTimePoint = std::chrono::time_point<std::chrono::steady_clock>;
 
 struct TelemetryConfig {
   // If true, telemetry will be enabled.
   bool EnableTelemetry;
 
-  // Additional destinations to send the logged entries.
-  // Could be stdout, stderr, or some local paths.
-  // Note: these are destinations are __in addition to__ whatever the default
-  // destination(s) are, as implemented by vendors.
+  // Implementation-defined names of additional destinations to send
+  // telemetry data (Could be stdout, stderr, or some local paths, etc).
+  //
+  // These strings will be interpreted by the vendor's code.
+  // So the users must pick the  from their vendor's pre-defined
+  // set of Destinations.
   std::vector<std::string> AdditionalDestinations;
 };
 
+using SteadyTimePoint = std::chrono::time_point<std::chrono::steady_clock>;
+
 struct TelemetryEventStats {
-  // REQUIRED: Start time of event
+  // REQUIRED: Start time of an event
   SteadyTimePoint Start;
-  // OPTIONAL: End time of event - may be empty if not meaningful.
+  // OPTIONAL: End time of an event - may be empty if not meaningful.
   std::optional<SteadyTimePoint> End;
   // TBD: could add some memory stats here too?
 
@@ -60,7 +107,10 @@ struct EntryKind {
   static const KindType Base = 0;
 };
 
-// The base class contains the basic set of data.
+// TelemetryInfo is the data courier, used to forward data from
+// the tool being monitored and the Telemery framework.
+//
+// This base class contains only the basic set of telemetry data.
 // Downstream implementations can add more fields as needed.
 struct TelemetryInfo {
   // A "session" corresponds to every time the tool starts.

>From fa885126c6d7e5b0caf6e2f2411dcd034081e187 Mon Sep 17 00:00:00 2001
From: Vy Nguyen <vyng at google.com>
Date: Thu, 29 Aug 2024 15:08:18 -0400
Subject: [PATCH 09/10] reformat header

---
 llvm/include/llvm/Telemetry/Telemetry.h | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/llvm/include/llvm/Telemetry/Telemetry.h b/llvm/include/llvm/Telemetry/Telemetry.h
index d4f192ab9159c5..cbc374df632124 100644
--- a/llvm/include/llvm/Telemetry/Telemetry.h
+++ b/llvm/include/llvm/Telemetry/Telemetry.h
@@ -19,9 +19,9 @@
 /// This framework is intended to be configurable and extensible:
 ///    - Any LLVM tool that wants to use Telemetry can extend/customize it.
 ///    - Toolchain vendors can also provide custom implementation/config of the
-///      Telemetry library, which could either overrides or extends the given tool's
-///      upstream implementation, to best fit their organization's usage and
-///      security models.
+///      Telemetry library, which could either overrides or extends the given
+///      tool's upstream implementation, to best fit their organization's usage
+///      and security models.
 ///    - End users of such tool can also configure telemetry (as allowed
 ///      by their vendor).
 ///
@@ -41,15 +41,15 @@
 ///      any of the collected data due to privacy and security reasons:
 ///        + Different organizations have different opinions on which data
 ///          is sensitive and which is not.
-///        + Data ownerships and data collection consents are hare to
+///        + Data ownerships and data collection consents are hard to
 ///          accommodate from LLVM developers' point of view.
 ///          (Eg., the data collected by Telemetry framework is NOT neccessarily
 ///           owned by the user of a LLVM tool with Telemetry enabled, hence
 ///           their consent to data collection isn't meaningful. On the other
-///           hand, we have no practical way to request consent from "real" owners.
+///           hand, we have no practical way to request consent from "real"
+///           owners.
 //===---------------------------------------------------------------------===//
 
-
 #ifndef LLVM_TELEMETRY_TELEMETRY_H
 #define LLVM_TELEMETRY_TELEMETRY_H
 
@@ -67,7 +67,6 @@
 namespace llvm {
 namespace telemetry {
 
-
 struct TelemetryConfig {
   // If true, telemetry will be enabled.
   bool EnableTelemetry;

>From 0866f64dc2863fad28d2027a08e01ab414c6b46a Mon Sep 17 00:00:00 2001
From: Vy Nguyen <vyng at google.com>
Date: Tue, 3 Sep 2024 13:52:20 -0400
Subject: [PATCH 10/10] addressed review comments and added separate docs in
 llvm/docs/

---
 llvm/docs/Telemetry.rst                 | 70 ++++++++++++++++++++++
 llvm/docs/UserGuides.rst                |  3 +
 llvm/include/llvm/Telemetry/Telemetry.h | 77 +++++++++++--------------
 3 files changed, 106 insertions(+), 44 deletions(-)
 create mode 100644 llvm/docs/Telemetry.rst

diff --git a/llvm/docs/Telemetry.rst b/llvm/docs/Telemetry.rst
new file mode 100644
index 00000000000000..997436e78735d0
--- /dev/null
+++ b/llvm/docs/Telemetry.rst
@@ -0,0 +1,70 @@
+===========================
+Telemetry framework in LLVM
+===========================
+
+.. contents::
+   :local:
+
+.. toctree::
+   :hidden:
+
+Objective
+=========
+
+Provides a common framework in LLVM for collecting various usage and performance
+metrics.
+It is located at `llvm/Telemetry/Telemetry.h`
+
+Characteristics
+---------------
+* Configurable and extensible by:
+  * Tools: any tool that wants to use Telemetry can extend and customize it.
+  * Vendors: Toolchain vendors can also provide custom implementation of the
+    library, which could either override or extend the given tool's upstream
+    implementation, to best fit their organization's usage and privacy models.
+  * End users of such tool can also configure Telemetry(as allowed by their
+    vendor).
+
+
+Important notes
+----------------
+
+* There is no concrete implementation of a Telemetry library in upstream LLVM.
+  We only provide the abstract API here. Any tool that wants telemetry will
+  implement one.
+  The rationale for this is that, all the tools in llvm are very different in
+  what they care about(what/where/when to instrument data). Hence, it might not
+  be practical to have a single implementation.
+  However, in the future, if we see enough common pattern, we can extract them
+  into a shared place. This is TBD - contributions are welcomed.
+
+* No implementation of Telemetry in upstream LLVM shall store any of the
+  collected data due to privacy and security reasons:
+  * Different organizations have different privacy models:
+    * Which data is sensitive, which is not?
+    * Whether it is acceptable for instrumented data to be stored anywhere?
+      (to a local file, what not?)
+  * Data ownership and data collection consents are hard to accommodate from
+    LLVM developers' point of view:
+     * Eg., data collected by Telemetry is not neccessarily owned by the user
+       of an LLVM tool with Telemetry enabled, hence the user's consent to data
+       collection is not meaningful. On the other hand, LLVM developers have no
+       practical way to request consent from the "real" owners.
+    
+    
+High-level design
+=================
+
+Key components
+--------------
+
+The framework is consisted of three important classes:
+* `llvm::telemetry::Telemeter`: The class responsible for collecting and
+   forwarding telemetry data. This is the main point of interaction between
+   the framework and any tool that wants to enable telemery.
+* `llvm::telemetry::TelemetryInfo`: Data courier
+* `llvm::telemetry::Config`: Configurations on the `Telemeter`.
+
+
+//  <TODO insert the flow chart that shows the interaction between the classes here>
+
diff --git a/llvm/docs/UserGuides.rst b/llvm/docs/UserGuides.rst
index 86101ffbd9ca5d..171da2053e7311 100644
--- a/llvm/docs/UserGuides.rst
+++ b/llvm/docs/UserGuides.rst
@@ -293,3 +293,6 @@ Additional Topics
 
 :doc:`Sandbox IR <SandboxIR>`
    This document describes the design and usage of Sandbox IR, a transactional layer over LLVM IR.
+
+:doc:`Telemetry`
+   This document describes the Telemetry framework in LLVM.
diff --git a/llvm/include/llvm/Telemetry/Telemetry.h b/llvm/include/llvm/Telemetry/Telemetry.h
index cbc374df632124..55a915a4fb894f 100644
--- a/llvm/include/llvm/Telemetry/Telemetry.h
+++ b/llvm/include/llvm/Telemetry/Telemetry.h
@@ -16,38 +16,7 @@
 /// - TelemetryInfo: data courier
 /// - TelemetryConfig: this stores configurations on Telemeter.
 ///
-/// This framework is intended to be configurable and extensible:
-///    - Any LLVM tool that wants to use Telemetry can extend/customize it.
-///    - Toolchain vendors can also provide custom implementation/config of the
-///      Telemetry library, which could either overrides or extends the given
-///      tool's upstream implementation, to best fit their organization's usage
-///      and security models.
-///    - End users of such tool can also configure telemetry (as allowed
-///      by their vendor).
-///
-/// Note: There are two important points to highlight about this package:
-///
-///  (0) There is (currently) no concrete implementation of Telemetry in
-///      upstream LLVM. We only provide the abstract API here. Any tool
-///      that wants telemetry will have to implement one.
-///
-///      The reason for this is because all the tools in llvm are
-///      very different in what they care about (what/when/where to instrument)
-///      Hence it might not be practical to a single implementation.
-///      However, if in the future when we see any common pattern, we can
-///      extract them into a shared place. That is TBD - contributions welcomed.
-///
-///  (1) No implementation of Telemetry in upstream LLVM shall directly store
-///      any of the collected data due to privacy and security reasons:
-///        + Different organizations have different opinions on which data
-///          is sensitive and which is not.
-///        + Data ownerships and data collection consents are hard to
-///          accommodate from LLVM developers' point of view.
-///          (Eg., the data collected by Telemetry framework is NOT neccessarily
-///           owned by the user of a LLVM tool with Telemetry enabled, hence
-///           their consent to data collection isn't meaningful. On the other
-///           hand, we have no practical way to request consent from "real"
-///           owners.
+/// Refer to its documentation at llvm/docs/Telemetry.rst for more details.
 //===---------------------------------------------------------------------===//
 
 #ifndef LLVM_TELEMETRY_TELEMETRY_H
@@ -67,7 +36,9 @@
 namespace llvm {
 namespace telemetry {
 
-struct TelemetryConfig {
+// Configuration for the Telemeter class.
+// This struct can be extended as needed.
+struct Config {
   // If true, telemetry will be enabled.
   bool EnableTelemetry;
 
@@ -80,9 +51,12 @@ struct TelemetryConfig {
   std::vector<std::string> AdditionalDestinations;
 };
 
+// Defines a convenient type for timestamp of various events.
+// This is used by the EventStats below.
 using SteadyTimePoint = std::chrono::time_point<std::chrono::steady_clock>;
 
-struct TelemetryEventStats {
+// Various time (and possibly memory) statistics of an event.
+struct EventStats {
   // REQUIRED: Start time of an event
   SteadyTimePoint Start;
   // OPTIONAL: End time of an event - may be empty if not meaningful.
@@ -107,22 +81,28 @@ struct EntryKind {
 };
 
 // TelemetryInfo is the data courier, used to forward data from
-// the tool being monitored and the Telemery framework.
+// the tool being monitored to the Telemery framework.
 //
 // This base class contains only the basic set of telemetry data.
 // Downstream implementations can add more fields as needed.
 struct TelemetryInfo {
-  // A "session" corresponds to every time the tool starts.
-  // All entries emitted for the same session will have
-  // the same session_uuid
-  std::string SessionUuid;
+  // This represents a unique-id, conventionally corresponding to
+  // a tools' session - ie., every time the tool starts until it exits.
+  //
+  // Note: a tool could have mutliple sessions running at once, in which
+  // case, these shall be multiple sets of TelemetryInfo with multiple unique
+  // ids.
+  //
+  // Different usages can assign different types of IDs to this field.
+  std::string SessionId;
 
+  // Time/memory statistics of this event.
   TelemetryEventStats Stats;
 
   std::optional<ExitDescription> ExitDesc;
 
   // Counting number of entries.
-  // (For each set of entries with the same session_uuid, this value should
+  // (For each set of entries with the same SessionId, this value should
   // be unique for each entry)
   size_t Counter;
 
@@ -132,20 +112,29 @@ struct TelemetryInfo {
   virtual json::Object serializeToJson() const;
 
   // For isa, dyn_cast, etc, operations.
-  virtual KindType getEntryKind() const { return EntryKind::Base; }
+  virtual KindType getKind() const { return EntryKind::Base; }
   static bool classof(const TelemetryInfo *T) {
-    return T->getEntryKind() == EntryKind::Base;
+    return T->getKind() == EntryKind::Base;
   }
 };
 
-// Where/how to send the telemetry entries.
-class TelemetryDestination {
+// This class presents a data sink to which the Telemetry framework
+// sends data.
+//
+// Its implementation is transparent to the framework.
+// It is up to the vendor to decide which pieces of data to forward
+// and where to forward them.
+class Destination {
 public:
   virtual ~TelemetryDestination() = default;
   virtual Error emitEntry(const TelemetryInfo *Entry) = 0;
   virtual std::string name() const = 0;
 };
 
+// This class is the main interaction point between any LLVM tool
+// and this framework.
+// It is responsible for collecting telemetry data from the tool being
+// monitored.
 class Telemeter {
 public:
   // Invoked upon tool startup



More information about the llvm-commits mailing list