[llvm] [llvm]Add a simple Telemetry framework (PR #102323)

Vy Nguyen via llvm-commits llvm-commits at lists.llvm.org
Wed Aug 28 19:12:46 PDT 2024


https://github.com/oontvoo updated https://github.com/llvm/llvm-project/pull/102323

>From dbb8b15edb5e63f37a66dd15e67d46ee1b4f6c1b Mon Sep 17 00:00:00 2001
From: Vy Nguyen <vyng at google.com>
Date: Wed, 7 Aug 2024 11:08:44 -0400
Subject: [PATCH 1/4] [llvm][lib]Propose a simple Telemetry framework.

Objective:
  - Provide a common framework in LLVM for collecting various usage metrics
  - Characteristics:
      + Extensible and configurable by:
          * tools in LLVM that want to use it
          * vendors in their downstream codebase
          * tools users (as allowed by vendor)

Background:
The framework was originally proposed only for LLDB, but there were quite a few requests
that it should be moved to llvm/lib given telemetry is a common usage to a lot of tools,
not just LLDB.

See more details on the design and discussions here on the RFC: https://discourse.llvm.org/t/rfc-lldb-telemetry-metrics/64588/20?u=oontvoo
---
 llvm/include/llvm/Telemetry/Telemetry.h | 99 +++++++++++++++++++++++++
 llvm/lib/CMakeLists.txt                 |  1 +
 llvm/lib/Telemetry/CMakeLists.txt       |  6 ++
 llvm/lib/Telemetry/Telemetry.cpp        | 32 ++++++++
 4 files changed, 138 insertions(+)
 create mode 100644 llvm/include/llvm/Telemetry/Telemetry.h
 create mode 100644 llvm/lib/Telemetry/CMakeLists.txt
 create mode 100644 llvm/lib/Telemetry/Telemetry.cpp

diff --git a/llvm/include/llvm/Telemetry/Telemetry.h b/llvm/include/llvm/Telemetry/Telemetry.h
new file mode 100644
index 00000000000000..e34b228b219c10
--- /dev/null
+++ b/llvm/include/llvm/Telemetry/Telemetry.h
@@ -0,0 +1,99 @@
+#ifndef LVM_TELEMETRY_TELEMETRY_H
+#define LVM_TELEMETRY_TELEMETRY_H
+
+#include <chrono>
+#include <ctime>
+#include <memory>
+#include <optional>
+#include <string>
+
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/JSON.h"
+
+namespace llvm {
+namespace telemetry {
+
+using SteadyTimePoint = std::chrono::time_point<std::chrono::steady_clock>;
+
+struct TelemetryConfig {
+  // If true, telemetry will be enabled.
+  bool enable_telemetry;
+
+  // Additional destinations to send the logged entries.
+  // Could be stdout, stderr, or some local paths.
+  // Note: these are destinations are __in addition to__ whatever the default
+  // destination(s) are, as implemented by vendors.
+  std::vector<std::string> additional_destinations;
+};
+
+struct TelemetryEventStats {
+  // REQUIRED: Start time of event
+  SteadyTimePoint m_start;
+  // OPTIONAL: End time of event - may be empty if not meaningful.
+  std::optional<SteadyTimePoint> m_end;
+  // TBD: could add some memory stats here too?
+
+  TelemetryEventStats() = default;
+  TelemetryEventStats(SteadyTimePoint start) : m_start(start) {}
+  TelemetryEventStats(SteadyTimePoint start, SteadyTimePoint end)
+      : m_start(start), m_end(end) {}
+
+  std::string ToString() const;
+};
+
+struct ExitDescription {
+  int exit_code;
+  std::string description;
+
+  std::string ToString() const;
+};
+
+// The base class contains the basic set of data.
+// Downstream implementations can add more fields as needed.
+struct TelemetryInfo {
+  // A "session" corresponds to every time the tool starts.
+  // All entries emitted for the same session will have
+  // the same session_uuid
+  std::string session_uuid;
+
+  TelemetryEventStats stats;
+
+  std::optional<ExitDescription> exit_description;
+
+  // Counting number of entries.
+  // (For each set of entries with the same session_uuid, this value should
+  // be unique for each entry)
+  size_t counter;
+
+  TelemetryInfo() = default;
+  ~TelemetryInfo() = default;
+  virtual std::string ToString() const;
+};
+
+// Where/how to send the telemetry entries.
+class TelemetryDestination {
+public:
+  virtual ~TelemetryDestination() = default;
+  virtual Error EmitEntry(const TelemetryInfo *entry) = 0;
+  virtual std::string name() const = 0;
+};
+
+class Telemeter {
+public:
+  virtual ~Telemeter() = default;
+
+  // Invoked upon tool startup
+  virtual void LogStartup(llvm::StringRef tool_path, TelemetryInfo *entry) = 0;
+
+  // Invoked upon tool exit.
+  virtual void LogExit(llvm::StringRef tool_path, TelemetryInfo *entry) = 0;
+
+  virtual void AddDestination(TelemetryDestination *destination) = 0;
+};
+
+} // namespace telemetry
+} // namespace llvm
+
+#endif // LVM_TELEMETRY_TELEMETRY_H
diff --git a/llvm/lib/CMakeLists.txt b/llvm/lib/CMakeLists.txt
index 638c3bd6f90f53..1d2fb329226484 100644
--- a/llvm/lib/CMakeLists.txt
+++ b/llvm/lib/CMakeLists.txt
@@ -41,6 +41,7 @@ add_subdirectory(ProfileData)
 add_subdirectory(Passes)
 add_subdirectory(TargetParser)
 add_subdirectory(TextAPI)
+add_subdirectory(Telemetry)
 add_subdirectory(ToolDrivers)
 add_subdirectory(XRay)
 if (LLVM_INCLUDE_TESTS)
diff --git a/llvm/lib/Telemetry/CMakeLists.txt b/llvm/lib/Telemetry/CMakeLists.txt
new file mode 100644
index 00000000000000..8208bdadb05e94
--- /dev/null
+++ b/llvm/lib/Telemetry/CMakeLists.txt
@@ -0,0 +1,6 @@
+add_llvm_component_library(LLVMTelemetry
+  Telemetry.cpp
+
+  ADDITIONAL_HEADER_DIRS
+  "${LLVM_MAIN_INCLUDE_DIR}/llvm/Telemetry"
+)
diff --git a/llvm/lib/Telemetry/Telemetry.cpp b/llvm/lib/Telemetry/Telemetry.cpp
new file mode 100644
index 00000000000000..f7100685ee2d2b
--- /dev/null
+++ b/llvm/lib/Telemetry/Telemetry.cpp
@@ -0,0 +1,32 @@
+#include "llvm/Telemetry/Telemetry.h"
+
+namespace llvm {
+namespace telemetry {
+
+std::string TelemetryEventStats::ToString() const {
+  std::string result;
+  llvm::raw_string_ostream os(result);
+  os << "start_timestamp: " << m_start.time_since_epoch().count()
+     << ", end_timestamp: "
+     << (m_end.has_value() ? std::to_string(m_end->time_since_epoch().count())
+                           : "<NONE>");
+  return result;
+}
+
+std::string ExitDescription::ToString() const {
+  return "exit_code: " + std::to_string(exit_code) +
+         ", description: " + description + "\n";
+}
+
+std::string TelemetryInfo::ToString() const {
+  return "[TelemetryInfo]\n" + ("  session_uuid:" + session_uuid + "\n") +
+         ("  stats: " + stats.ToString() + "\n") +
+         ("  exit_description: " +
+          (exit_description.has_value() ? exit_description->ToString()
+                                        : "<NONE>") +
+          "\n") +
+         ("  counter: " + std::to_string(counter) + "\n");
+}
+
+} // namespace telemetry
+} // namespace llvm
\ No newline at end of file

>From 6e43f67c9f0412fbf0f4a16f2be68daa40d4b6f4 Mon Sep 17 00:00:00 2001
From: Vy Nguyen <vyng at google.com>
Date: Wed, 7 Aug 2024 13:21:32 -0400
Subject: [PATCH 2/4] add header

---
 llvm/include/llvm/Telemetry/Telemetry.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/llvm/include/llvm/Telemetry/Telemetry.h b/llvm/include/llvm/Telemetry/Telemetry.h
index e34b228b219c10..fab170c61c02ec 100644
--- a/llvm/include/llvm/Telemetry/Telemetry.h
+++ b/llvm/include/llvm/Telemetry/Telemetry.h
@@ -1,3 +1,15 @@
+//===- llvm/Telemetry/Telemetry.h - Telemetry -------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Defines the commom Telemetry framework.
+//
+//===----------------------------------------------------------------------===//
+
 #ifndef LVM_TELEMETRY_TELEMETRY_H
 #define LVM_TELEMETRY_TELEMETRY_H
 

>From e25f5fcd79f84086f4fddb7288d353cf0c0858c0 Mon Sep 17 00:00:00 2001
From: Vy Nguyen <vyng at google.com>
Date: Wed, 7 Aug 2024 14:25:22 -0400
Subject: [PATCH 3/4] fixed typo

---
 llvm/include/llvm/Telemetry/Telemetry.h | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/llvm/include/llvm/Telemetry/Telemetry.h b/llvm/include/llvm/Telemetry/Telemetry.h
index fab170c61c02ec..dc24f9c3fc3fb8 100644
--- a/llvm/include/llvm/Telemetry/Telemetry.h
+++ b/llvm/include/llvm/Telemetry/Telemetry.h
@@ -5,13 +5,9 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
-//
-// Defines the commom Telemetry framework.
-//
-//===----------------------------------------------------------------------===//
 
-#ifndef LVM_TELEMETRY_TELEMETRY_H
-#define LVM_TELEMETRY_TELEMETRY_H
+#ifndef LLVM_TELEMETRY_TELEMETRY_H
+#define LLVM_TELEMETRY_TELEMETRY_H
 
 #include <chrono>
 #include <ctime>
@@ -108,4 +104,4 @@ class Telemeter {
 } // namespace telemetry
 } // namespace llvm
 
-#endif // LVM_TELEMETRY_TELEMETRY_H
+#endif // LLVM_TELEMETRY_TELEMETRY_H

>From 0057bcf63de085a4d41184eac7d4d4fe9ba7f299 Mon Sep 17 00:00:00 2001
From: Vy Nguyen <vyng at google.com>
Date: Wed, 28 Aug 2024 22:12:30 -0400
Subject: [PATCH 4/4] add tests and addressed review comments

---
 llvm/include/llvm/Telemetry/Telemetry.h    |  53 +-
 llvm/lib/Telemetry/Telemetry.cpp           |  31 +-
 llvm/unittests/CMakeLists.txt              |   1 +
 llvm/unittests/Telemetry/CMakeLists.txt    |   9 +
 llvm/unittests/Telemetry/TelemetryTest.cpp | 606 +++++++++++++++++++++
 5 files changed, 652 insertions(+), 48 deletions(-)
 create mode 100644 llvm/unittests/Telemetry/CMakeLists.txt
 create mode 100644 llvm/unittests/Telemetry/TelemetryTest.cpp

diff --git a/llvm/include/llvm/Telemetry/Telemetry.h b/llvm/include/llvm/Telemetry/Telemetry.h
index dc24f9c3fc3fb8..935b1feddbed6e 100644
--- a/llvm/include/llvm/Telemetry/Telemetry.h
+++ b/llvm/include/llvm/Telemetry/Telemetry.h
@@ -27,35 +27,37 @@ using SteadyTimePoint = std::chrono::time_point<std::chrono::steady_clock>;
 
 struct TelemetryConfig {
   // If true, telemetry will be enabled.
-  bool enable_telemetry;
+  bool EnableTelemetry;
 
   // Additional destinations to send the logged entries.
   // Could be stdout, stderr, or some local paths.
   // Note: these are destinations are __in addition to__ whatever the default
   // destination(s) are, as implemented by vendors.
-  std::vector<std::string> additional_destinations;
+  std::vector<std::string> AdditionalDestinations;
 };
 
 struct TelemetryEventStats {
   // REQUIRED: Start time of event
-  SteadyTimePoint m_start;
+  SteadyTimePoint Start;
   // OPTIONAL: End time of event - may be empty if not meaningful.
-  std::optional<SteadyTimePoint> m_end;
+  std::optional<SteadyTimePoint> End;
   // TBD: could add some memory stats here too?
 
   TelemetryEventStats() = default;
-  TelemetryEventStats(SteadyTimePoint start) : m_start(start) {}
-  TelemetryEventStats(SteadyTimePoint start, SteadyTimePoint end)
-      : m_start(start), m_end(end) {}
-
-  std::string ToString() const;
+  TelemetryEventStats(SteadyTimePoint Start) : Start(Start) {}
+  TelemetryEventStats(SteadyTimePoint Start, SteadyTimePoint End)
+      : Start(Start), End(End) {}
 };
 
 struct ExitDescription {
-  int exit_code;
-  std::string description;
+  int ExitCode;
+  std::string Description;
+};
 
-  std::string ToString() const;
+// For isa, dyn_cast, etc operations on TelemetryInfo.
+typedef unsigned KindType;
+struct EntryKind {
+  static const KindType Base = 0;
 };
 
 // The base class contains the basic set of data.
@@ -64,41 +66,46 @@ struct TelemetryInfo {
   // A "session" corresponds to every time the tool starts.
   // All entries emitted for the same session will have
   // the same session_uuid
-  std::string session_uuid;
+  std::string SessionUuid;
 
-  TelemetryEventStats stats;
+  TelemetryEventStats Stats;
 
-  std::optional<ExitDescription> exit_description;
+  std::optional<ExitDescription> ExitDesc;
 
   // Counting number of entries.
   // (For each set of entries with the same session_uuid, this value should
   // be unique for each entry)
-  size_t counter;
+  size_t Counter;
 
   TelemetryInfo() = default;
   ~TelemetryInfo() = default;
-  virtual std::string ToString() const;
+
+  virtual json::Object serializeToJson() const;
+
+  // For isa, dyn_cast, etc, operations.
+  virtual KindType getEntryKind() const { return EntryKind::Base; }
+  static bool classof(const TelemetryInfo* T) {
+    return T->getEntryKind() == EntryKind::Base;
+  }
 };
 
 // Where/how to send the telemetry entries.
 class TelemetryDestination {
 public:
   virtual ~TelemetryDestination() = default;
-  virtual Error EmitEntry(const TelemetryInfo *entry) = 0;
+  virtual Error emitEntry(const TelemetryInfo *Entry) = 0;
   virtual std::string name() const = 0;
 };
 
 class Telemeter {
 public:
-  virtual ~Telemeter() = default;
-
   // Invoked upon tool startup
-  virtual void LogStartup(llvm::StringRef tool_path, TelemetryInfo *entry) = 0;
+  virtual void logStartup(llvm::StringRef ToolPath, TelemetryInfo *Entry) = 0;
 
   // Invoked upon tool exit.
-  virtual void LogExit(llvm::StringRef tool_path, TelemetryInfo *entry) = 0;
+  virtual void logExit(llvm::StringRef ToolPath, TelemetryInfo *Entry) = 0;
 
-  virtual void AddDestination(TelemetryDestination *destination) = 0;
+  virtual void addDestination(TelemetryDestination *Destination) = 0;
 };
 
 } // namespace telemetry
diff --git a/llvm/lib/Telemetry/Telemetry.cpp b/llvm/lib/Telemetry/Telemetry.cpp
index f7100685ee2d2b..b9605c4a38ecd0 100644
--- a/llvm/lib/Telemetry/Telemetry.cpp
+++ b/llvm/lib/Telemetry/Telemetry.cpp
@@ -3,30 +3,11 @@
 namespace llvm {
 namespace telemetry {
 
-std::string TelemetryEventStats::ToString() const {
-  std::string result;
-  llvm::raw_string_ostream os(result);
-  os << "start_timestamp: " << m_start.time_since_epoch().count()
-     << ", end_timestamp: "
-     << (m_end.has_value() ? std::to_string(m_end->time_since_epoch().count())
-                           : "<NONE>");
-  return result;
-}
-
-std::string ExitDescription::ToString() const {
-  return "exit_code: " + std::to_string(exit_code) +
-         ", description: " + description + "\n";
-}
-
-std::string TelemetryInfo::ToString() const {
-  return "[TelemetryInfo]\n" + ("  session_uuid:" + session_uuid + "\n") +
-         ("  stats: " + stats.ToString() + "\n") +
-         ("  exit_description: " +
-          (exit_description.has_value() ? exit_description->ToString()
-                                        : "<NONE>") +
-          "\n") +
-         ("  counter: " + std::to_string(counter) + "\n");
-}
+llvm::json::Object TelemetryInfo::serializeToJson() const {
+  return json::Object {
+    {"UUID", SessionUuid},
+  };
+};
 
 } // namespace telemetry
-} // namespace llvm
\ No newline at end of file
+} // namespace llvm
diff --git a/llvm/unittests/CMakeLists.txt b/llvm/unittests/CMakeLists.txt
index 911ede701982f6..9d6b3999c43958 100644
--- a/llvm/unittests/CMakeLists.txt
+++ b/llvm/unittests/CMakeLists.txt
@@ -49,6 +49,7 @@ add_subdirectory(Support)
 add_subdirectory(TableGen)
 add_subdirectory(Target)
 add_subdirectory(TargetParser)
+add_subdirectory(Telemetry)
 add_subdirectory(Testing)
 add_subdirectory(TextAPI)
 add_subdirectory(Transforms)
diff --git a/llvm/unittests/Telemetry/CMakeLists.txt b/llvm/unittests/Telemetry/CMakeLists.txt
new file mode 100644
index 00000000000000..a40ae4b2f55607
--- /dev/null
+++ b/llvm/unittests/Telemetry/CMakeLists.txt
@@ -0,0 +1,9 @@
+set(LLVM_LINK_COMPONENTS
+  Telemetry
+  Core
+  Support
+  )
+
+add_llvm_unittest(TelemetryTests
+  TelemetryTest.cpp
+  )
diff --git a/llvm/unittests/Telemetry/TelemetryTest.cpp b/llvm/unittests/Telemetry/TelemetryTest.cpp
new file mode 100644
index 00000000000000..36a4daf55e1018
--- /dev/null
+++ b/llvm/unittests/Telemetry/TelemetryTest.cpp
@@ -0,0 +1,606 @@
+//===- llvm/unittest/Telemetry/TelemetryTest.cpp - Telemetry unittests ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Telemetry/Telemetry.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/JSON.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/raw_ostream.h"
+#include "gtest/gtest.h"
+
+#include <cstdio>
+#include <vector>
+#include <chrono>
+#include <ctime>
+
+#include <iostream>  // TODO: remove this
+
+// Testing parameters.ve
+static thread_local bool HasExitError = false;
+static thread_local std::string ExitMsg = "";
+static thread_local bool HasVendorConfig = false;
+static thread_local bool SanitizeData = false;
+static thread_local std::string Buffer = "";
+static thread_local std::vector<llvm::json::Object> EmittedJsons;
+
+namespace llvm {
+namespace telemetry {
+namespace vendor_code {
+
+// Generate unique (but deterministic "uuid" for testing purposes).
+static std::string nextUuid() {
+  static size_t seed = 1111;
+  return std::to_string(seed++);
+}
+
+struct VendorEntryKind {
+   // TODO: should avoid dup with other vendors' Types?
+  static const KindType VendorCommon = 0b010101000;
+  static const KindType Startup      = 0b010101001;
+  static const KindType Exit         = 0b010101010;
+ };
+
+
+// Demonstrates that the TelemetryInfo (data courier) struct can be extended
+// by downstream code to store additional data as needed.
+// It can also define additional data serialization method.
+struct VendorCommonTelemetryInfo : public TelemetryInfo {
+
+  static bool classof(const TelemetryInfo* T) {
+    // Subclasses of this is also acceptable.
+    return (T->getEntryKind() & VendorEntryKind::VendorCommon)  == VendorEntryKind::VendorCommon;
+
+  }
+
+  KindType getEntryKind() const override { return VendorEntryKind::VendorCommon;}
+
+  virtual void serializeToStream(llvm::raw_ostream &OS) const = 0;
+};
+
+
+struct StartupEvent : public VendorCommonTelemetryInfo {
+  std::string MagicStartupMsg;
+
+  StartupEvent() = default;
+  StartupEvent(const StartupEvent &E) {
+    SessionUuid = E.SessionUuid;
+    Stats = E.Stats;
+    ExitDesc = E.ExitDesc;
+    Counter = E.Counter;
+
+    MagicStartupMsg = E.MagicStartupMsg;
+  }
+
+  static bool classof(const TelemetryInfo* T) {
+    return T->getEntryKind() == VendorEntryKind::Startup;
+  }
+
+  KindType getEntryKind() const override { return VendorEntryKind::Startup;}
+
+  void serializeToStream(llvm::raw_ostream &OS) const override {
+    OS<< "UUID:" << SessionUuid << "\n";
+    OS << "MagicStartupMsg:" << MagicStartupMsg << "\n";
+  }
+
+  json::Object serializeToJson() const override {
+    return json::Object{
+      {"Startup", { {"UUID", SessionUuid},
+                    {"MagicStartupMsg", MagicStartupMsg}}},
+    };
+  }
+};
+
+struct ExitEvent : public VendorCommonTelemetryInfo {
+  std::string MagicExitMsg;
+
+  ExitEvent() = default;
+  // Provide a copy ctor because we may need to make a copy
+  // before sanitizing the Entry.
+  ExitEvent(const ExitEvent &E) {
+    SessionUuid = E.SessionUuid;
+    Stats = E.Stats;
+    ExitDesc = E.ExitDesc;
+    Counter = E.Counter;
+
+    MagicExitMsg = E.MagicExitMsg;
+  }
+
+  static bool classof(const TelemetryInfo* T) {
+    return T->getEntryKind() == VendorEntryKind::Exit;
+  }
+
+  unsigned getEntryKind() const override { return VendorEntryKind::Exit;}
+
+  void serializeToStream(llvm::raw_ostream &OS) const override {
+    OS << "UUID:" << SessionUuid << "\n";
+    if (ExitDesc.has_value())
+      OS << "ExitCode:" << ExitDesc->ExitCode << "\n";
+    OS << "MagicExitMsg:" << MagicExitMsg << "\n";
+  }
+
+  json::Object serializeToJson() const override {
+    json::Array I =  json::Array{
+        {"UUID", SessionUuid},
+        {"MagicExitMsg", MagicExitMsg},
+    };
+    if (ExitDesc.has_value())
+      I.push_back(json::Value({"ExitCode", ExitDesc->ExitCode}));
+    return json::Object {
+      {"Exit", std::move(I)},
+    };
+  }
+};
+
+struct CustomTelemetryEvent : public VendorCommonTelemetryInfo {
+  std::vector<std::string> Msgs;
+
+  CustomTelemetryEvent() = default;
+  CustomTelemetryEvent(const CustomTelemetryEvent &E) {
+    SessionUuid = E.SessionUuid;
+    Stats = E.Stats;
+    ExitDesc = E.ExitDesc;
+    Counter = E.Counter;
+
+    Msgs = E.Msgs;
+  }
+
+  void serializeToStream(llvm::raw_ostream &OS) const override {
+    int I = 0;
+    for (const std::string &M : Msgs) {
+      OS << "MSG_" << I << ":" << M << "\n";
+      ++I;
+    }
+  }
+
+  json::Object serializeToJson() const  override {
+    json::Object Inner;
+    int I = 0;
+    for (const std::string &M : Msgs) {
+      Inner.try_emplace(("MSG_" + llvm::Twine(I)).str(), M);
+      ++I;
+    }
+    return json::Object {
+      {"Midpoint", std::move(Inner)}}
+    ;
+  }
+};
+
+
+// The following classes demonstrate how downstream code can
+// define one or more custom TelemetryDestination(s) to handle
+// Telemetry data differently, specifically:
+//    + which data to send (fullset or sanitized)
+//    + where to send the data
+//    + in what form
+
+const std::string STRING_DEST( "STRING");
+const std::string JSON_DEST ("JSON");
+
+// This Destination sends data to a std::string given at ctor.
+class StringDestination : public TelemetryDestination {
+public:
+  // ShouldSanitize: if true, sanitize the data before emitting, otherwise, emit
+  // the full set.
+  StringDestination(bool ShouldSanitize, std::string& Buf)
+      : ShouldSanitize(ShouldSanitize), OS(Buf) {
+  }
+
+  Error emitEntry(const TelemetryInfo *Entry) override {
+    if (isa<VendorCommonTelemetryInfo>(Entry)) {
+      if (auto *E = dyn_cast<VendorCommonTelemetryInfo>(Entry)) {
+        if (ShouldSanitize) {
+          if (isa<StartupEvent>(E) || isa<ExitEvent>(E)) {
+            // There is nothing to sanitize for this type of data, so keep as-is.
+            E->serializeToStream(OS);
+          } else if (isa<CustomTelemetryEvent>(E)) {
+            auto Sanitized = sanitizeFields(dyn_cast<CustomTelemetryEvent>(E));
+            Sanitized.serializeToStream(OS);
+          } else {
+            llvm_unreachable("unexpected type");
+          }
+        } else {
+          E->serializeToStream(OS);
+        }
+      }
+    } else {
+      // Unfamiliar entries, just send the entry's UUID
+      OS << "UUID:" << Entry->SessionUuid << "\n";
+    }
+    return Error::success();
+  }
+
+  std::string name() const override { return STRING_DEST;}
+
+private:
+  // Returns a copy of the given entry, but with some fields sanitized.
+  CustomTelemetryEvent sanitizeFields(const CustomTelemetryEvent* Entry) {
+    CustomTelemetryEvent Sanitized(*Entry);
+    // Pretend that messages stored at ODD positions are "sensitive",
+    // hence need to be sanitized away.
+    int S = Sanitized.Msgs.size() - 1;
+    for (int I = S % 2 == 0 ? S - 1 : S; I >= 0; I -= 2)
+      Sanitized.Msgs[I]="";
+      return Sanitized;
+  }
+
+  bool ShouldSanitize;
+  llvm::raw_string_ostream OS;
+
+};
+
+// This Destination sends data to some "blackbox" in form of JSON.
+class JsonStreamDestination : public TelemetryDestination {
+public:
+  JsonStreamDestination(bool ShouldSanitize)
+      : ShouldSanitize(ShouldSanitize) {}
+
+  Error emitEntry(const TelemetryInfo *Entry) override {
+    if (auto *E = dyn_cast<VendorCommonTelemetryInfo>(Entry)) {
+        if (ShouldSanitize) {
+          if (isa<StartupEvent>(E) || isa<ExitEvent>(E)) {
+            // There is nothing to sanitize for this type of data, so keep as-is.
+            return SendToBlackbox(E->serializeToJson());
+          } else if (isa<CustomTelemetryEvent>(E)) {
+            auto Sanitized = sanitizeFields(dyn_cast<CustomTelemetryEvent>(E));
+            return SendToBlackbox(Sanitized.serializeToJson());
+          } else {
+            llvm_unreachable("unexpected type");
+          }
+        } else {
+          return SendToBlackbox(E->serializeToJson());
+        }
+      }  else {
+      // Unfamiliar entries, just send the entry's UUID
+     return SendToBlackbox(json::Object{{"UUID", Entry->SessionUuid}});
+    }
+    return make_error<StringError>("unhandled codepath in emitEntry",
+                                        inconvertibleErrorCode());
+  }
+
+  std::string name() const override { return JSON_DEST;}
+private:
+
+  // Returns a copy of the given entry, but with some fields sanitized.
+  CustomTelemetryEvent sanitizeFields(const CustomTelemetryEvent* Entry) {
+    CustomTelemetryEvent Sanitized(*Entry);
+    // Pretend that messages stored at EVEN positions are "sensitive",
+    // hence need to be sanitized away.
+    int S = Sanitized.Msgs.size() - 1;
+    for (int I = S % 2 == 0 ? S : S - 1; I >= 0; I -= 2)
+      Sanitized.Msgs[I]="";
+
+    return Sanitized;
+  }
+
+  llvm::Error SendToBlackbox(json::Object O) {
+    // Here is where the vendor-defined Destination class can
+    // send the data to some internal storage.
+    // For testing purposes, we just queue up the entries to
+    // the vector for validation.
+    EmittedJsons.push_back(std::move(O));
+    return Error::success();
+  }
+  bool ShouldSanitize;
+};
+
+// Custom vendor-defined Telemeter that has additional data-collection point.
+class TestTelemeter : public Telemeter {
+public:
+  TestTelemeter(std::string SessionUuid) : Uuid(SessionUuid), Counter(0) {}
+
+  static std::unique_ptr<TestTelemeter> createInstance(TelemetryConfig *config) {
+    if (!config->EnableTelemetry) return std::unique_ptr<TestTelemeter>(nullptr);
+      std::unique_ptr<TestTelemeter> Telemeter = std::make_unique<TestTelemeter>(nextUuid());
+      // Set up Destination based on the given config.
+      for (const std::string &Dest : config->AdditionalDestinations) {
+        // The destination(s) are ALSO defined by vendor, so it should understand
+        // what the name of each destination signifies.
+        if (Dest == JSON_DEST) {
+          Telemeter->addDestination(new vendor_code::JsonStreamDestination(SanitizeData));
+        } else if (Dest == STRING_DEST) {
+          Telemeter->addDestination(new vendor_code::StringDestination(SanitizeData, Buffer));
+        } else {
+          llvm_unreachable(llvm::Twine("unknown destination: ", Dest).str().c_str());
+        }
+      }
+      return Telemeter;
+  }
+
+  void logStartup(llvm::StringRef ToolPath, TelemetryInfo *Entry) override {
+    ToolName = ToolPath.str();
+
+    // The vendor can add additional stuff to the entry before logging.
+    if (auto* S = dyn_cast<StartupEvent>(Entry)) {
+      S->MagicStartupMsg = llvm::Twine("One_", ToolPath).str();
+    }
+    emitToDestinations(Entry);
+  }
+
+  void logExit(llvm::StringRef ToolPath, TelemetryInfo *Entry) override {
+    // Ensure we're shutting down the same tool we started with.
+    if (ToolPath != ToolName){
+      std::string Str;
+      raw_string_ostream OS(Str);
+      OS << "Expected tool with name" << ToolName << ", but got " << ToolPath;
+      llvm_unreachable(Str.c_str());
+    }
+
+    // The vendor can add additional stuff to the entry before logging.
+    if (auto * E = dyn_cast<ExitEvent>(Entry)) {
+      E->MagicExitMsg = llvm::Twine("Three_", ToolPath).str();
+    }
+
+    emitToDestinations(Entry);
+  }
+
+  void addDestination(TelemetryDestination* Dest) override {
+    Destinations.push_back(Dest);
+  }
+
+  void logMidpoint(TelemetryInfo *Entry) {
+    // The custom Telemeter can record and send additional data.
+    if (auto * C = dyn_cast<CustomTelemetryEvent>(Entry)) {
+      C->Msgs.push_back("Two");
+      C->Msgs.push_back("Deux");
+      C->Msgs.push_back("Zwei");
+    }
+
+    emitToDestinations(Entry);
+  }
+
+  ~TestTelemeter() {
+    for (auto* Dest : Destinations)
+      delete Dest;
+  }
+
+  template <typename T>
+  T makeDefaultTelemetryInfo() {
+    T Ret;
+    Ret.SessionUuid = Uuid;
+    Ret.Counter = Counter++;
+    return Ret;
+  }
+private:
+
+  void emitToDestinations(TelemetryInfo *Entry) {
+    for (TelemetryDestination *Dest : Destinations) {
+      llvm::Error err = Dest->emitEntry(Entry);
+      if(err) {
+        // Log it and move on.
+      }
+    }
+  }
+
+  const std::string Uuid;
+  size_t Counter;
+  std::string ToolName;
+  std::vector<TelemetryDestination *> Destinations;
+};
+
+// Pretend to be a "weakly" defined vendor-specific function.
+void ApplyVendorSpecificConfigs(TelemetryConfig *config) {
+  config->EnableTelemetry = true;
+}
+
+} // namespace vendor_code
+} // namespace telemetry
+} // namespace llvm
+
+namespace {
+
+void ApplyCommonConfig(llvm::telemetry::TelemetryConfig* config) {
+  // Any shareable configs for the upstream tool can go here.
+  // .....
+}
+
+std::shared_ptr<llvm::telemetry::TelemetryConfig> GetTelemetryConfig() {
+  // Telemetry is disabled by default.
+  // The vendor can enable in their config.
+  auto Config = std::make_shared<llvm::telemetry::TelemetryConfig>();
+  Config->EnableTelemetry = false;
+
+  ApplyCommonConfig(Config.get());
+
+  // Apply vendor specific config, if present.
+  // In practice, this would be a build-time param.
+  // Eg:
+  //
+  // #ifdef HAS_VENDOR_TELEMETRY_CONFIG
+  //     llvm::telemetry::vendor_code::ApplyVendorSpecificConfigs(config.get());
+  // #endif
+  // But for unit testing, we use the testing params defined at the top.
+  if (HasVendorConfig) {
+    llvm::telemetry::vendor_code::ApplyVendorSpecificConfigs(Config.get());
+  }
+  return Config;
+}
+
+using namespace llvm;
+using namespace llvm::telemetry;
+
+// For deterministic tests, pre-defined certain important time-points
+// rather than using now().
+//
+// Preset StartTime to EPOCH.
+auto StartTime = std::chrono::time_point<std::chrono::steady_clock>{};
+// Pretend the time it takes for the tool's initialization is EPOCH + 5 milliseconds
+auto InitCompleteTime = StartTime + std::chrono::milliseconds(5);
+auto MidPointTime = StartTime + std::chrono::milliseconds(10);
+auto MidPointCompleteTime = MidPointTime + std::chrono::milliseconds(5);
+// Preset ExitTime to EPOCH + 20 milliseconds
+auto ExitTime = StartTime + std::chrono::milliseconds(20);
+// Pretend the time it takes to complete tearing down the tool is 10 milliseconds.
+auto ExitCompleteTime = ExitTime + std::chrono::milliseconds(10);
+
+void AtToolStart(std::string ToolName, vendor_code::TestTelemeter* T) {
+  vendor_code::StartupEvent Entry = T->makeDefaultTelemetryInfo<vendor_code::StartupEvent>();
+  Entry.Stats = {StartTime, InitCompleteTime};
+  T->logStartup(ToolName, &Entry);
+}
+
+void AtToolExit(std::string ToolName, vendor_code::TestTelemeter* T) {
+  vendor_code::ExitEvent Entry = T->makeDefaultTelemetryInfo<vendor_code::ExitEvent>();
+  Entry.Stats = {ExitTime, ExitCompleteTime};
+
+  if (HasExitError) {
+    Entry.ExitDesc = {1, ExitMsg};
+  }
+  T->logExit(ToolName, &Entry);
+}
+
+void AtToolMidPoint (vendor_code::TestTelemeter* T) {
+  vendor_code::CustomTelemetryEvent Entry = T->makeDefaultTelemetryInfo<vendor_code::CustomTelemetryEvent>();
+  Entry.Stats = {MidPointTime, MidPointCompleteTime};
+  T->logMidpoint(&Entry);
+}
+
+// Helper function to print the given object content to string.
+static std::string ValueToString(const json::Value* V) {
+  std::string Ret;
+  llvm::raw_string_ostream P(Ret);
+  P << *V;
+  return Ret;
+}
+
+// Without vendor's implementation, telemetry is not enabled by default.
+TEST(TelemetryTest, TelemetryDefault) {
+  HasVendorConfig = false;
+  std::shared_ptr<llvm::telemetry::TelemetryConfig> Config = GetTelemetryConfig();
+  auto Tool = vendor_code::TestTelemeter::createInstance(Config.get());
+
+  EXPECT_EQ(nullptr, Tool.get());
+}
+
+TEST(TelemetryTest, TelemetryEnabled) {
+  const std::string ToolName = "TestToolOne";
+
+  // Preset some test params.
+  HasVendorConfig = true;
+  SanitizeData = false;
+  Buffer = "";
+  EmittedJsons.clear();
+
+  std::shared_ptr<llvm::telemetry::TelemetryConfig> Config = GetTelemetryConfig();
+
+  // Add some destinations
+  Config->AdditionalDestinations.push_back(vendor_code::STRING_DEST);
+  Config->AdditionalDestinations.push_back(vendor_code::JSON_DEST);
+
+  auto Tool = vendor_code::TestTelemeter::createInstance(Config.get());
+
+  AtToolStart(ToolName, Tool.get());
+  AtToolMidPoint(Tool.get());
+  AtToolExit(ToolName, Tool.get());
+
+
+  // Check that the StringDestination emitted properly
+  {
+    std::string ExpectedBuff = "UUID:1111\n"
+                               "MagicStartupMsg:One_TestToolOne\n"
+                               "MSG_0:Two\n"
+                               "MSG_1:Deux\n"
+                               "MSG_2:Zwei\n"
+                               "UUID:1111\n"
+                               "MagicExitMsg:Three_TestToolOne\n";
+
+    EXPECT_STREQ(ExpectedBuff.c_str(), Buffer.c_str());
+  }
+
+  // Check that the JsonDestination emitted properly
+  {
+
+    // There should be 3 events emitted by the Telemeter (start, midpoint, exit)
+    EXPECT_EQ(3, EmittedJsons.size());
+
+    const json::Value* StartupEntry = EmittedJsons[0].get("Startup");
+    ASSERT_NE(StartupEntry, nullptr);
+    EXPECT_STREQ("[[\"UUID\",\"1111\"],[\"MagicStartupMsg\",\"One_TestToolOne\"]]",
+                 ValueToString(StartupEntry).c_str());
+
+    const json::Value* MidpointEntry = EmittedJsons[1].get("Midpoint");
+    ASSERT_NE(MidpointEntry, nullptr);
+    EXPECT_STREQ("{\"MSG_0\":\"Two\",\"MSG_1\":\"Deux\",\"MSG_2\":\"Zwei\"}",
+                 ValueToString(MidpointEntry).c_str());
+
+    const json::Value* ExitEntry = EmittedJsons[2].get("Exit");
+    ASSERT_NE(ExitEntry, nullptr);
+    EXPECT_STREQ("[[\"UUID\",\"1111\"],[\"MagicExitMsg\",\"Three_TestToolOne\"]]",
+                 ValueToString(ExitEntry).c_str());
+  }
+}
+
+// Similar to previous tests, but toggling the data-sanitization option ON.
+// The recorded data should have some fields removed.
+TEST(TelemetryTest, TelemetryEnabledSanitizeData) {
+  const std::string ToolName = "TestToolOne";
+
+  // Preset some test params.
+  HasVendorConfig = true;
+  SanitizeData = true;
+  Buffer = "";
+  EmittedJsons.clear();
+
+  std::shared_ptr<llvm::telemetry::TelemetryConfig> Config = GetTelemetryConfig();
+
+  // Add some destinations
+  Config->AdditionalDestinations.push_back(vendor_code::STRING_DEST);
+  Config->AdditionalDestinations.push_back(vendor_code::JSON_DEST);
+
+  auto Tool = vendor_code::TestTelemeter::createInstance(Config.get());
+
+  AtToolStart(ToolName, Tool.get());
+  AtToolMidPoint(Tool.get());
+  AtToolExit(ToolName, Tool.get());
+
+
+  // Check that the StringDestination emitted properly
+  {
+    // The StringDestination should have removed the odd-positioned msgs.
+    std::string ExpectedBuff = "UUID:1111\n"
+                               "MagicStartupMsg:One_TestToolOne\n"
+                               "MSG_0:Two\n"
+                               "MSG_1:\n"    // was sannitized away.
+                               "MSG_2:Zwei\n"
+                               "UUID:1111\n"
+                               "MagicExitMsg:Three_TestToolOne\n";
+
+    EXPECT_STREQ(ExpectedBuff.c_str(), Buffer.c_str());
+  }
+
+  // Check that the JsonDestination emitted properly
+  {
+
+    // There should be 3 events emitted by the Telemeter (start, midpoint, exit)
+    EXPECT_EQ(3, EmittedJsons.size());
+
+    const json::Value* StartupEntry = EmittedJsons[0].get("Startup");
+    ASSERT_NE(StartupEntry, nullptr);
+    EXPECT_STREQ("[[\"UUID\",\"1111\"],[\"MagicStartupMsg\",\"One_TestToolOne\"]]",
+                 ValueToString(StartupEntry).c_str());
+
+    const json::Value* MidpointEntry = EmittedJsons[1].get("Midpoint");
+    ASSERT_NE(MidpointEntry, nullptr);
+    // The JsonDestination should have removed the even-positioned msgs.
+    EXPECT_STREQ("{\"MSG_0\":\"\",\"MSG_1\":\"Deux\",\"MSG_2\":\"\"}",
+                 ValueToString(MidpointEntry).c_str());
+
+    const json::Value* ExitEntry = EmittedJsons[2].get("Exit");
+    ASSERT_NE(ExitEntry, nullptr);
+    EXPECT_STREQ("[[\"UUID\",\"1111\"],[\"MagicExitMsg\",\"Three_TestToolOne\"]]",
+                 ValueToString(ExitEntry).c_str());
+  }
+}
+
+} // namespace



More information about the llvm-commits mailing list