[llvm] [JIT][X86] Respect `JITDUMP_USE_ARCH_TIMESTAMP` environment variable (PR #146085)

Cody Tapscott via llvm-commits llvm-commits at lists.llvm.org
Fri Jun 27 09:53:38 PDT 2025


https://github.com/topolarity updated https://github.com/llvm/llvm-project/pull/146085

>From b83f2dbd8a1c5c8a49d479deab56139dc789ef1c Mon Sep 17 00:00:00 2001
From: Cody Tapscott <topolarity at tapscott.me>
Date: Fri, 27 Jun 2025 10:14:33 -0400
Subject: [PATCH] [JIT] Respect `JITDUMP_USE_ARCH_TIMESTAMP` environment
 variable

This environment variable is set by `perf record -e intel_pt` to indicate that an
arch-specific timestamp should be used when emitting the jitdump instead of
CLOCK_MONOTONIC, which allows the dump to be correlated to the Intel PT recording
data by `perf inject --jit`.
---
 .../Orc/TargetProcess/JITLoaderPerf.cpp       | 41 +++++++++++++++----
 .../PerfJITEvents/PerfJITEventListener.cpp    | 34 ++++++++++++---
 2 files changed, 63 insertions(+), 12 deletions(-)

diff --git a/llvm/lib/ExecutionEngine/Orc/TargetProcess/JITLoaderPerf.cpp b/llvm/lib/ExecutionEngine/Orc/TargetProcess/JITLoaderPerf.cpp
index 1a61d3188a820..e63c1096163af 100644
--- a/llvm/lib/ExecutionEngine/Orc/TargetProcess/JITLoaderPerf.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/TargetProcess/JITLoaderPerf.cpp
@@ -23,6 +23,10 @@
 #include <mutex>
 #include <optional>
 
+#if defined(__x86_64__)
+#include <x86intrin.h> // for __rdtsc()
+#endif
+
 #ifdef __linux__
 
 #include <sys/mman.h> // mmap()
@@ -38,6 +42,10 @@
    (uint32_t)'D')
 #define LLVM_PERF_JIT_VERSION 1
 
+// bit 0: set if the jitdump file is using an architecture-specific timestamp
+// clock source
+#define JITDUMP_FLAGS_ARCH_TIMESTAMP (1ULL << 0)
+
 using namespace llvm;
 using namespace llvm::orc;
 
@@ -54,6 +62,9 @@ struct PerfState {
   // output data stream
   std::unique_ptr<raw_fd_ostream> Dumpstream;
 
+  // use arch-specific timestamp instead of CLOCK_MONOTONIC
+  bool UseArchTimestamp = false;
+
   // perf mmap marker
   void *MarkerAddr = NULL;
 };
@@ -102,7 +113,15 @@ static inline uint64_t timespec_to_ns(const struct timespec *TS) {
   return ((uint64_t)TS->tv_sec * NanoSecPerSec) + TS->tv_nsec;
 }
 
-static inline uint64_t perf_get_timestamp() {
+static inline uint64_t perf_get_timestamp(bool use_arch_timestamp) {
+  if (use_arch_timestamp) {
+#if defined(__x86_64__)
+    return __rdtsc();
+#else
+    return 0;
+#endif
+  }
+
   timespec TS;
   if (clock_gettime(CLOCK_MONOTONIC, &TS))
     return 0;
@@ -116,7 +135,7 @@ static void writeDebugRecord(const PerfJITDebugInfoRecord &DebugRecord) {
                     << DebugRecord.Entries.size() << " entries\n");
   [[maybe_unused]] size_t Written = 0;
   DIR Dir{RecHeader{static_cast<uint32_t>(DebugRecord.Prefix.Id),
-                    DebugRecord.Prefix.TotalSize, perf_get_timestamp()},
+                    DebugRecord.Prefix.TotalSize, perf_get_timestamp(State->UseArchTimestamp)},
           DebugRecord.CodeAddr, DebugRecord.Entries.size()};
   State->Dumpstream->write(reinterpret_cast<const char *>(&Dir), sizeof(Dir));
   Written += sizeof(Dir);
@@ -136,7 +155,7 @@ static void writeCodeRecord(const PerfJITCodeLoadRecord &CodeRecord) {
                     << CodeRecord.CodeSize << " and code index "
                     << CodeRecord.CodeIndex << "\n");
   CLR Clr{RecHeader{static_cast<uint32_t>(CodeRecord.Prefix.Id),
-                    CodeRecord.Prefix.TotalSize, perf_get_timestamp()},
+                    CodeRecord.Prefix.TotalSize, perf_get_timestamp(State->UseArchTimestamp)},
           State->Pid,
           Tid,
           CodeRecord.Vma,
@@ -160,7 +179,7 @@ writeUnwindRecord(const PerfJITCodeUnwindingInfoRecord &UnwindRecord) {
          << UnwindRecord.EHFrameHdrSize << " and mapped size "
          << UnwindRecord.MappedSize << "\n";
   UWR Uwr{RecHeader{static_cast<uint32_t>(UnwindRecord.Prefix.Id),
-                    UnwindRecord.Prefix.TotalSize, perf_get_timestamp()},
+                    UnwindRecord.Prefix.TotalSize, perf_get_timestamp(State->UseArchTimestamp)},
           UnwindRecord.UnwindDataSize, UnwindRecord.EHFrameHdrSize,
           UnwindRecord.MappedSize};
   LLVM_DEBUG(dbgs() << "wrote " << sizeof(Uwr) << " bytes of UWR, "
@@ -246,7 +265,8 @@ static Expected<Header> FillMachine(PerfState &State) {
   Hdr.Version = LLVM_PERF_JIT_VERSION;
   Hdr.TotalSize = sizeof(Hdr);
   Hdr.Pid = State.Pid;
-  Hdr.Timestamp = perf_get_timestamp();
+  Hdr.Timestamp = perf_get_timestamp(State.UseArchTimestamp);
+  Hdr.Flags = State.UseArchTimestamp ? JITDUMP_FLAGS_ARCH_TIMESTAMP : 0;
 
   char Id[16];
   struct {
@@ -330,8 +350,15 @@ static Error InitDebuggingDir(PerfState &State) {
 static Error registerJITLoaderPerfStartImpl() {
   PerfState Tentative;
   Tentative.Pid = sys::Process::getProcessId();
+
+  if (const char *UseArchTimestampEnv = getenv("JITDUMP_USE_ARCH_TIMESTAMP")) {
+    if (strcmp(UseArchTimestampEnv, "1") == 0 && perf_get_timestamp(true)) {
+      Tentative.UseArchTimestamp = true;
+    }
+  }
+
   // check if clock-source is supported
-  if (!perf_get_timestamp())
+  if (!Tentative.UseArchTimestamp && !perf_get_timestamp(false))
     return make_error<StringError>("kernel does not support CLOCK_MONOTONIC",
                                    inconvertibleErrorCode());
 
@@ -385,7 +412,7 @@ static Error registerJITLoaderPerfEndImpl() {
   RecHeader Close;
   Close.Id = static_cast<uint32_t>(PerfJITRecordType::JIT_CODE_CLOSE);
   Close.TotalSize = sizeof(Close);
-  Close.Timestamp = perf_get_timestamp();
+  Close.Timestamp = perf_get_timestamp(State->UseArchTimestamp);
   State->Dumpstream->write(reinterpret_cast<const char *>(&Close),
                            sizeof(Close));
   if (State->MarkerAddr)
diff --git a/llvm/lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp b/llvm/lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp
index 4d14a606b98b0..e050cf7eead1f 100644
--- a/llvm/lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp
+++ b/llvm/lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp
@@ -36,6 +36,10 @@
 #include <time.h>      // clock_gettime(), time(), localtime_r() */
 #include <unistd.h>    // for read(), close()
 
+#if defined(__x86_64__)
+#include <x86intrin.h> // for __rdtsc()
+#endif
+
 using namespace llvm;
 using namespace llvm::object;
 typedef DILineInfoSpecifier::FileLineInfoKind FileLineInfoKind;
@@ -100,6 +104,9 @@ class PerfJITEventListener : public JITEventListener {
   // perf mmap marker
   void *MarkerAddr = NULL;
 
+  // use arch-specific timestamp instead of CLOCK_MONOTONIC
+  bool UseArchTimestamp = false;
+
   // perf support ready
   bool SuccessfullyInitialized = false;
 
@@ -168,10 +175,18 @@ static inline uint64_t timespec_to_ns(const struct timespec *ts) {
   return ((uint64_t)ts->tv_sec * NanoSecPerSec) + ts->tv_nsec;
 }
 
-static inline uint64_t perf_get_timestamp(void) {
+static inline uint64_t perf_get_timestamp(bool use_arch_timestamp) {
   struct timespec ts;
   int ret;
 
+  if (use_arch_timestamp) {
+#if defined(__x86_64__)
+    return __rdtsc();
+#else
+    return 0;
+#endif
+  }
+
   ret = clock_gettime(CLOCK_MONOTONIC, &ts);
   if (ret)
     return 0;
@@ -181,8 +196,16 @@ static inline uint64_t perf_get_timestamp(void) {
 
 PerfJITEventListener::PerfJITEventListener()
     : Pid(sys::Process::getProcessId()) {
+
+  // check if arch-specific timestamp should be used
+  if (const char *UseArchTimestampEnv = getenv("JITDUMP_USE_ARCH_TIMESTAMP")) {
+    if (strcmp(UseArchTimestampEnv, "1") == 0 && perf_get_timestamp(true)) {
+      UseArchTimestamp = true;
+    }
+  }
+
   // check if clock-source is supported
-  if (!perf_get_timestamp()) {
+  if (!UseArchTimestamp && !perf_get_timestamp(false)) {
     errs() << "kernel does not support CLOCK_MONOTONIC\n";
     return;
   }
@@ -221,7 +244,8 @@ PerfJITEventListener::PerfJITEventListener()
   Header.Version = LLVM_PERF_JIT_VERSION;
   Header.TotalSize = sizeof(Header);
   Header.Pid = Pid;
-  Header.Timestamp = perf_get_timestamp();
+  Header.Timestamp = perf_get_timestamp(UseArchTimestamp);
+  Header.Flags = UseArchTimestamp ? JITDUMP_FLAGS_ARCH_TIMESTAMP : 0;
   Dumpstream->write(reinterpret_cast<const char *>(&Header), sizeof(Header));
 
   // Everything initialized, can do profiling now.
@@ -417,7 +441,7 @@ void PerfJITEventListener::NotifyCode(Expected<llvm::StringRef> &Symbol,
   rec.Prefix.TotalSize = sizeof(rec) +        // debug record itself
                          Symbol->size() + 1 + // symbol name
                          CodeSize;            // and code
-  rec.Prefix.Timestamp = perf_get_timestamp();
+  rec.Prefix.Timestamp = perf_get_timestamp(UseArchTimestamp);
 
   rec.CodeSize = CodeSize;
   rec.Vma = CodeAddr;
@@ -446,7 +470,7 @@ void PerfJITEventListener::NotifyDebug(uint64_t CodeAddr,
   LLVMPerfJitRecordDebugInfo rec;
   rec.Prefix.Id = JIT_CODE_DEBUG_INFO;
   rec.Prefix.TotalSize = sizeof(rec); // will be increased further
-  rec.Prefix.Timestamp = perf_get_timestamp();
+  rec.Prefix.Timestamp = perf_get_timestamp(UseArchTimestamp);
   rec.CodeAddr = CodeAddr;
   rec.NrEntry = Lines.size();
 



More information about the llvm-commits mailing list