[lld] [LLD][COFF] Add support for `--time-trace` (PR #68236)

Alexandre Ganea via llvm-commits llvm-commits at lists.llvm.org
Thu Oct 5 07:05:14 PDT 2023


https://github.com/aganea updated https://github.com/llvm/llvm-project/pull/68236

>From 6358289431af0bc379896f35e896d3f5e85567a3 Mon Sep 17 00:00:00 2001
From: Alexandre Ganea <alex_toresh at yahoo.fr>
Date: Wed, 4 Oct 2023 13:16:54 -0400
Subject: [PATCH 1/3] [LLD][COFF] Add support for `--time-trace`

Also add the necessary time scopes, so that the profile trace shows in great detail which tasks are executed.
---
 lld/COFF/Config.h                             |   2 +
 lld/COFF/DebugTypes.cpp                       |   3 +
 lld/COFF/Driver.cpp                           | 198 +++++++++++-------
 lld/COFF/DriverUtils.cpp                      |   2 +
 lld/COFF/ICF.cpp                              |   2 +
 lld/COFF/LLDMapFile.cpp                       |   2 +
 lld/COFF/LTO.cpp                              |   2 +
 lld/COFF/MapFile.cpp                          |   2 +
 lld/COFF/MarkLive.cpp                         |   2 +
 lld/COFF/MinGW.cpp                            |   2 +
 lld/COFF/Options.td                           |   8 +
 lld/COFF/PDB.cpp                              |  91 +++++---
 lld/COFF/SymbolTable.cpp                      |   2 +
 lld/COFF/Writer.cpp                           | 125 +++++++----
 lld/Common/Filesystem.cpp                     |   2 +
 lld/docs/ReleaseNotes.rst                     |   3 +
 lld/test/COFF/time-trace.s                    |  44 ++++
 llvm/lib/DebugInfo/MSF/MSFBuilder.cpp         |   5 +
 .../DebugInfo/PDB/Native/DbiStreamBuilder.cpp |   2 +
 .../DebugInfo/PDB/Native/GSIStreamBuilder.cpp |   2 +
 .../PDB/Native/InfoStreamBuilder.cpp          |   2 +
 .../DebugInfo/PDB/Native/PDBFileBuilder.cpp   |  26 ++-
 .../PDB/Native/PDBStringTableBuilder.cpp      |   2 +
 .../DebugInfo/PDB/Native/TpiStreamBuilder.cpp |   2 +
 llvm/lib/Support/FileOutputBuffer.cpp         |   3 +
 25 files changed, 373 insertions(+), 163 deletions(-)
 create mode 100644 lld/test/COFF/time-trace.s

diff --git a/lld/COFF/Config.h b/lld/COFF/Config.h
index da31818be23dc1e..2d5d4d63d69fa4a 100644
--- a/lld/COFF/Config.h
+++ b/lld/COFF/Config.h
@@ -286,6 +286,7 @@ struct Configuration {
   uint32_t minorSubsystemVersion = 0;
   uint32_t timestamp = 0;
   uint32_t functionPadMin = 0;
+  uint32_t timeTraceGranularity = 0;
   bool dynamicBase = true;
   bool allowBind = true;
   bool cetCompat = false;
@@ -309,6 +310,7 @@ struct Configuration {
   bool swaprunNet = false;
   bool thinLTOEmitImportsFiles;
   bool thinLTOIndexOnly;
+  bool timeTraceEnabled = false;
   bool autoImport = false;
   bool pseudoRelocs = false;
   bool stdcallFixup = false;
diff --git a/lld/COFF/DebugTypes.cpp b/lld/COFF/DebugTypes.cpp
index 23ddccea695f884..5071b7b79d23e3f 100644
--- a/lld/COFF/DebugTypes.cpp
+++ b/lld/COFF/DebugTypes.cpp
@@ -29,6 +29,7 @@
 #include "llvm/Support/FormatVariadic.h"
 #include "llvm/Support/Parallel.h"
 #include "llvm/Support/Path.h"
+#include "llvm/Support/TimeProfiler.h"
 
 using namespace llvm;
 using namespace llvm::codeview;
@@ -1068,6 +1069,7 @@ TypeMerger::~TypeMerger() = default;
 void TypeMerger::mergeTypesWithGHash() {
   // Load ghashes. Do type servers and PCH objects first.
   {
+    llvm::TimeTraceScope timeScope("Load GHASHes");
     ScopedTimer t1(ctx.loadGHashTimer);
     parallelForEach(dependencySources,
                     [&](TpiSource *source) { source->loadGHashes(); });
@@ -1075,6 +1077,7 @@ void TypeMerger::mergeTypesWithGHash() {
                     [&](TpiSource *source) { source->loadGHashes(); });
   }
 
+  llvm::TimeTraceScope timeScope("Merge types (GHASH)");
   ScopedTimer t2(ctx.mergeGHashTimer);
   GHashState ghashState;
 
diff --git a/lld/COFF/Driver.cpp b/lld/COFF/Driver.cpp
index 554db5969ea13dd..c6a7be14017fe7f 100644
--- a/lld/COFF/Driver.cpp
+++ b/lld/COFF/Driver.cpp
@@ -247,6 +247,7 @@ void LinkerDriver::enqueuePath(StringRef path, bool wholeArchive, bool lazy) {
       createFutureForFile(std::string(path)));
   std::string pathStr = std::string(path);
   enqueueTask([=]() {
+    llvm::TimeTraceScope timeScope("File: ", path);
     auto [mb, ec] = future->get();
     if (ec) {
       // Retry reading the file (synchronously) now that we may have added
@@ -332,6 +333,7 @@ void LinkerDriver::enqueueArchiveMember(const Archive::Child &c,
       reportBufferError(mbOrErr.takeError(), check(c.getFullName()));
     MemoryBufferRef mb = mbOrErr.get();
     enqueueTask([=]() {
+      llvm::TimeTraceScope timeScope("Archive: ", mb.getBufferIdentifier());
       ctx.driver.addArchiveBuffer(mb, toCOFFString(ctx, sym), parentName,
                                   offsetInArchive);
     });
@@ -348,6 +350,8 @@ void LinkerDriver::enqueueArchiveMember(const Archive::Child &c,
     auto mbOrErr = future->get();
     if (mbOrErr.second)
       reportBufferError(errorCodeToError(mbOrErr.second), childName);
+    llvm::TimeTraceScope timeScope("Archive: ",
+                                   mbOrErr.first->getBufferIdentifier());
     // Pass empty string as archive name so that the original filename is
     // used as the buffer identifier.
     ctx.driver.addArchiveBuffer(takeBuffer(std::move(mbOrErr.first)),
@@ -974,6 +978,7 @@ std::string LinkerDriver::getImportName(bool asLib) {
 }
 
 void LinkerDriver::createImportLibrary(bool asLib) {
+  llvm::TimeTraceScope timeScope("Create import library");
   std::vector<COFFShortExport> exports;
   for (Export &e1 : ctx.config.exports) {
     COFFShortExport e2;
@@ -1031,6 +1036,7 @@ void LinkerDriver::createImportLibrary(bool asLib) {
 }
 
 void LinkerDriver::parseModuleDefs(StringRef path) {
+  llvm::TimeTraceScope timeScope("Parse def file");
   std::unique_ptr<MemoryBuffer> mb =
       CHECK(MemoryBuffer::getFile(path, /*IsText=*/false,
                                   /*RequiresNullTerminator=*/false,
@@ -1095,6 +1101,7 @@ void LinkerDriver::enqueueTask(std::function<void()> task) {
 }
 
 bool LinkerDriver::run() {
+  llvm::TimeTraceScope timeScope("Read input files");
   ScopedTimer t(ctx.inputFileTimer);
 
   bool didWork = !taskQueue.empty();
@@ -1233,6 +1240,8 @@ static void markAddrsig(Symbol *s) {
 }
 
 static void findKeepUniqueSections(COFFLinkerContext &ctx) {
+  llvm::TimeTraceScope timeScope("Find keep unique sections");
+
   // Exported symbols could be address-significant in other executables or DSOs,
   // so we conservatively mark them as address-significant.
   for (Export &r : ctx.config.exports)
@@ -1325,6 +1334,7 @@ void LinkerDriver::parsePDBAltPath() {
 /// trees into one resource tree.
 /// Call after ObjFile::Instances is complete.
 void LinkerDriver::convertResources() {
+  llvm::TimeTraceScope timeScope("Convert resources");
   std::vector<ObjFile *> resourceObjFiles;
 
   for (ObjFile *f : ctx.objFileInstances) {
@@ -1478,6 +1488,16 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
   ArgParser parser(ctx);
   opt::InputArgList args = parser.parse(argsArr);
 
+  // Initialize time trace profiler.
+  config->timeTraceEnabled = args.hasArg(OPT_time_trace_eq);
+  config->timeTraceGranularity =
+      args::getInteger(args, OPT_time_trace_granularity_eq, 500);
+
+  if (config->timeTraceEnabled)
+    timeTraceProfilerInitialize(config->timeTraceGranularity, argsArr[0]);
+
+  llvm::TimeTraceScope timeScope("COFF link");
+
   // Parse and evaluate -mllvm options.
   std::vector<const char *> v;
   v.push_back("lld-link (LLVM option parsing)");
@@ -2127,6 +2147,7 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
     parseFunctionPadMin(arg);
 
   if (tar) {
+    llvm::TimeTraceScope timeScope("Reproducer: response file");
     tar->append("response.txt",
                 createResponseFile(args, filePaths,
                                    ArrayRef<StringRef>(searchPaths).slice(1)));
@@ -2147,15 +2168,18 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
           machineToStr(config->machine));
 
   // Handle /export
-  for (auto *arg : args.filtered(OPT_export)) {
-    Export e = parseExport(arg->getValue());
-    if (config->machine == I386) {
-      if (!isDecorated(e.name))
-        e.name = saver().save("_" + e.name);
-      if (!e.extName.empty() && !isDecorated(e.extName))
-        e.extName = saver().save("_" + e.extName);
+  {
+    llvm::TimeTraceScope timeScope("Parse /export");
+    for (auto *arg : args.filtered(OPT_export)) {
+      Export e = parseExport(arg->getValue());
+      if (config->machine == I386) {
+        if (!isDecorated(e.name))
+          e.name = saver().save("_" + e.name);
+        if (!e.extName.empty() && !isDecorated(e.extName))
+          e.extName = saver().save("_" + e.extName);
+      }
+      config->exports.push_back(e);
     }
-    config->exports.push_back(e);
   }
 
   // Handle /def
@@ -2176,40 +2200,47 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
   // that from entry point name.  Must happen before /entry handling,
   // and after the early return when just writing an import library.
   if (config->subsystem == IMAGE_SUBSYSTEM_UNKNOWN) {
+    llvm::TimeTraceScope timeScope("Infer subsystem");
     config->subsystem = inferSubsystem();
     if (config->subsystem == IMAGE_SUBSYSTEM_UNKNOWN)
       fatal("subsystem must be defined");
   }
 
   // Handle /entry and /dll
-  if (auto *arg = args.getLastArg(OPT_entry)) {
-    config->entry = addUndefined(mangle(arg->getValue()));
-  } else if (!config->entry && !config->noEntry) {
-    if (args.hasArg(OPT_dll)) {
-      StringRef s = (config->machine == I386) ? "__DllMainCRTStartup at 12"
-                                              : "_DllMainCRTStartup";
-      config->entry = addUndefined(s);
-    } else if (config->driverWdm) {
-      // /driver:wdm implies /entry:_NtProcessStartup
-      config->entry = addUndefined(mangle("_NtProcessStartup"));
-    } else {
-      // Windows specific -- If entry point name is not given, we need to
-      // infer that from user-defined entry name.
-      StringRef s = findDefaultEntry();
-      if (s.empty())
-        fatal("entry point must be defined");
-      config->entry = addUndefined(s);
-      log("Entry name inferred: " + s);
+  {
+    llvm::TimeTraceScope timeScope("Entry point");
+    if (auto *arg = args.getLastArg(OPT_entry)) {
+      config->entry = addUndefined(mangle(arg->getValue()));
+    } else if (!config->entry && !config->noEntry) {
+      if (args.hasArg(OPT_dll)) {
+        StringRef s = (config->machine == I386) ? "__DllMainCRTStartup at 12"
+                                                : "_DllMainCRTStartup";
+        config->entry = addUndefined(s);
+      } else if (config->driverWdm) {
+        // /driver:wdm implies /entry:_NtProcessStartup
+        config->entry = addUndefined(mangle("_NtProcessStartup"));
+      } else {
+        // Windows specific -- If entry point name is not given, we need to
+        // infer that from user-defined entry name.
+        StringRef s = findDefaultEntry();
+        if (s.empty())
+          fatal("entry point must be defined");
+        config->entry = addUndefined(s);
+        log("Entry name inferred: " + s);
+      }
     }
   }
 
   // Handle /delayload
-  for (auto *arg : args.filtered(OPT_delayload)) {
-    config->delayLoads.insert(StringRef(arg->getValue()).lower());
-    if (config->machine == I386) {
-      config->delayLoadHelper = addUndefined("___delayLoadHelper2 at 8");
-    } else {
-      config->delayLoadHelper = addUndefined("__delayLoadHelper2");
+  {
+    llvm::TimeTraceScope timeScope("Delay load");
+    for (auto *arg : args.filtered(OPT_delayload)) {
+      config->delayLoads.insert(StringRef(arg->getValue()).lower());
+      if (config->machine == I386) {
+        config->delayLoadHelper = addUndefined("___delayLoadHelper2 at 8");
+      } else {
+        config->delayLoadHelper = addUndefined("__delayLoadHelper2");
+      }
     }
   }
 
@@ -2303,54 +2334,57 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
   // This code may add new undefined symbols to the link, which may enqueue more
   // symbol resolution tasks, so we need to continue executing tasks until we
   // converge.
-  do {
-    // Windows specific -- if entry point is not found,
-    // search for its mangled names.
-    if (config->entry)
-      mangleMaybe(config->entry);
-
-    // Windows specific -- Make sure we resolve all dllexported symbols.
-    for (Export &e : config->exports) {
-      if (!e.forwardTo.empty())
-        continue;
-      e.sym = addUndefined(e.name);
-      if (e.source != ExportSource::Directives)
-        e.symbolName = mangleMaybe(e.sym);
-    }
+  {
+    llvm::TimeTraceScope timeScope("Add unresolved symbols");
+    do {
+      // Windows specific -- if entry point is not found,
+      // search for its mangled names.
+      if (config->entry)
+        mangleMaybe(config->entry);
+
+      // Windows specific -- Make sure we resolve all dllexported symbols.
+      for (Export &e : config->exports) {
+        if (!e.forwardTo.empty())
+          continue;
+        e.sym = addUndefined(e.name);
+        if (e.source != ExportSource::Directives)
+          e.symbolName = mangleMaybe(e.sym);
+      }
 
-    // Add weak aliases. Weak aliases is a mechanism to give remaining
-    // undefined symbols final chance to be resolved successfully.
-    for (auto pair : config->alternateNames) {
-      StringRef from = pair.first;
-      StringRef to = pair.second;
-      Symbol *sym = ctx.symtab.find(from);
-      if (!sym)
-        continue;
-      if (auto *u = dyn_cast<Undefined>(sym))
-        if (!u->weakAlias)
-          u->weakAlias = ctx.symtab.addUndefined(to);
-    }
+      // Add weak aliases. Weak aliases is a mechanism to give remaining
+      // undefined symbols final chance to be resolved successfully.
+      for (auto pair : config->alternateNames) {
+        StringRef from = pair.first;
+        StringRef to = pair.second;
+        Symbol *sym = ctx.symtab.find(from);
+        if (!sym)
+          continue;
+        if (auto *u = dyn_cast<Undefined>(sym))
+          if (!u->weakAlias)
+            u->weakAlias = ctx.symtab.addUndefined(to);
+      }
 
-    // If any inputs are bitcode files, the LTO code generator may create
-    // references to library functions that are not explicit in the bitcode
-    // file's symbol table. If any of those library functions are defined in a
-    // bitcode file in an archive member, we need to arrange to use LTO to
-    // compile those archive members by adding them to the link beforehand.
-    if (!ctx.bitcodeFileInstances.empty())
-      for (auto *s : lto::LTO::getRuntimeLibcallSymbols())
-        ctx.symtab.addLibcall(s);
-
-    // Windows specific -- if __load_config_used can be resolved, resolve it.
-    if (ctx.symtab.findUnderscore("_load_config_used"))
-      addUndefined(mangle("_load_config_used"));
-
-    if (args.hasArg(OPT_include_optional)) {
-      // Handle /includeoptional
-      for (auto *arg : args.filtered(OPT_include_optional))
-        if (isa_and_nonnull<LazyArchive>(ctx.symtab.find(arg->getValue())))
-          addUndefined(arg->getValue());
-    }
-  } while (run());
+      // If any inputs are bitcode files, the LTO code generator may create
+      // references to library functions that are not explicit in the bitcode
+      // file's symbol table. If any of those library functions are defined in a
+      // bitcode file in an archive member, we need to arrange to use LTO to
+      // compile those archive members by adding them to the link beforehand.
+      if (!ctx.bitcodeFileInstances.empty())
+        for (auto *s : lto::LTO::getRuntimeLibcallSymbols())
+          ctx.symtab.addLibcall(s);
+
+      // Windows specific -- if __load_config_used can be resolved, resolve it.
+      if (ctx.symtab.findUnderscore("_load_config_used"))
+        addUndefined(mangle("_load_config_used"));
+
+      if (args.hasArg(OPT_include_optional)) {
+        // Handle /includeoptional
+        for (auto *arg : args.filtered(OPT_include_optional))
+          if (isa_and_nonnull<LazyArchive>(ctx.symtab.find(arg->getValue())))
+            addUndefined(arg->getValue());
+      }
+    } while (run());
+  }
 
   // Create wrapped symbols for -wrap option.
   std::vector<WrappedSymbol> wrapped = addWrappedSymbols(ctx, args);
@@ -2447,6 +2481,7 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
   // need to create a .lib file. In MinGW mode, we only do that when the
   // -implib option is given explicitly, for compatibility with GNU ld.
   if (!config->exports.empty() || config->dll) {
+    llvm::TimeTraceScope timeScope("Create .lib exports");
     fixupExports();
     if (!config->noimplib && (!config->mingw || !config->implib.empty()))
       createImportLibrary(/*asLib=*/false);
@@ -2500,6 +2535,7 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
 
   // Handle /call-graph-ordering-file and /call-graph-profile-sort (default on).
   if (config->callGraphProfileSort) {
+    llvm::TimeTraceScope timeScope("Call graph");
     if (auto *arg = args.getLastArg(OPT_call_graph_ordering_file)) {
       parseCallGraphFile(arg->getValue());
     }
@@ -2548,6 +2584,12 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
   rootTimer.stop();
   if (config->showTiming)
     ctx.rootTimer.print();
+
+  if (config->timeTraceEnabled) {
+    checkError(timeTraceProfilerWrite(
+        args.getLastArgValue(OPT_time_trace_eq).str(), config->outputFile));
+    timeTraceProfilerCleanup();
+  }
 }
 
 } // namespace lld::coff
diff --git a/lld/COFF/DriverUtils.cpp b/lld/COFF/DriverUtils.cpp
index fa15f816fb5fcd4..583f6af070b6258 100644
--- a/lld/COFF/DriverUtils.cpp
+++ b/lld/COFF/DriverUtils.cpp
@@ -31,6 +31,7 @@
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/Process.h"
 #include "llvm/Support/Program.h"
+#include "llvm/Support/TimeProfiler.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/WindowsManifest/WindowsManifestMerger.h"
 #include <limits>
@@ -661,6 +662,7 @@ static StringRef exportSourceName(ExportSource s) {
 // Performs error checking on all /export arguments.
 // It also sets ordinals.
 void LinkerDriver::fixupExports() {
+  llvm::TimeTraceScope timeScope("Fixup exports");
   // Symbol ordinals must be unique.
   std::set<uint16_t> ords;
   for (Export &e : ctx.config.exports) {
diff --git a/lld/COFF/ICF.cpp b/lld/COFF/ICF.cpp
index 37f5e7549b7fc09..0f43da0dbc101af 100644
--- a/lld/COFF/ICF.cpp
+++ b/lld/COFF/ICF.cpp
@@ -26,6 +26,7 @@
 #include "llvm/ADT/Hashing.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Parallel.h"
+#include "llvm/Support/TimeProfiler.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/xxhash.h"
 #include <algorithm>
@@ -246,6 +247,7 @@ void ICF::forEachClass(std::function<void(size_t, size_t)> fn) {
 // Two sections are considered the same if their section headers,
 // contents and relocations are all the same.
 void ICF::run() {
+  llvm::TimeTraceScope timeScope("ICF");
   ScopedTimer t(ctx.icfTimer);
 
   // Collect only mergeable sections and group by hash value.
diff --git a/lld/COFF/LLDMapFile.cpp b/lld/COFF/LLDMapFile.cpp
index c14480aaf821af7..58098cf5d6528d0 100644
--- a/lld/COFF/LLDMapFile.cpp
+++ b/lld/COFF/LLDMapFile.cpp
@@ -25,6 +25,7 @@
 #include "Writer.h"
 #include "lld/Common/ErrorHandler.h"
 #include "llvm/Support/Parallel.h"
+#include "llvm/Support/TimeProfiler.h"
 #include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
@@ -92,6 +93,7 @@ void lld::coff::writeLLDMapFile(const COFFLinkerContext &ctx) {
   if (ctx.config.lldmapFile.empty())
     return;
 
+  llvm::TimeTraceScope timeScope(".lldmap file");
   std::error_code ec;
   raw_fd_ostream os(ctx.config.lldmapFile, ec, sys::fs::OF_None);
   if (ec)
diff --git a/lld/COFF/LTO.cpp b/lld/COFF/LTO.cpp
index 09d722849c38270..7df931911213672 100644
--- a/lld/COFF/LTO.cpp
+++ b/lld/COFF/LTO.cpp
@@ -86,6 +86,8 @@ lto::Config BitcodeCompiler::createConfig() {
   c.CSIRProfile = std::string(ctx.config.ltoCSProfileFile);
   c.RunCSIRInstr = ctx.config.ltoCSProfileGenerate;
   c.PGOWarnMismatch = ctx.config.ltoPGOWarnMismatch;
+  c.TimeTraceEnabled = ctx.config.timeTraceEnabled;
+  c.TimeTraceGranularity = ctx.config.timeTraceGranularity;
 
   if (ctx.config.emit == EmitKind::LLVM) {
     c.PostInternalizeModuleHook = [this](size_t task, const Module &m) {
diff --git a/lld/COFF/MapFile.cpp b/lld/COFF/MapFile.cpp
index f7a4ef9612907d6..ed521dd375ed010 100644
--- a/lld/COFF/MapFile.cpp
+++ b/lld/COFF/MapFile.cpp
@@ -36,6 +36,7 @@
 #include "lld/Common/Timer.h"
 #include "llvm/Support/Parallel.h"
 #include "llvm/Support/Path.h"
+#include "llvm/Support/TimeProfiler.h"
 #include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
@@ -203,6 +204,7 @@ void lld::coff::writeMapFile(COFFLinkerContext &ctx) {
   if (ctx.config.mapFile.empty())
     return;
 
+  llvm::TimeTraceScope timeScope("Map file");
   std::error_code ec;
   raw_fd_ostream os(ctx.config.mapFile, ec, sys::fs::OF_None);
   if (ec)
diff --git a/lld/COFF/MarkLive.cpp b/lld/COFF/MarkLive.cpp
index ad8c340f1845177..2cf216a6aaad564 100644
--- a/lld/COFF/MarkLive.cpp
+++ b/lld/COFF/MarkLive.cpp
@@ -11,6 +11,7 @@
 #include "Symbols.h"
 #include "lld/Common/Timer.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/TimeProfiler.h"
 #include <vector>
 
 namespace lld::coff {
@@ -19,6 +20,7 @@ namespace lld::coff {
 // COMDAT chunks will be ignored by Writer, so they will be excluded
 // from the final output.
 void markLive(COFFLinkerContext &ctx) {
+  llvm::TimeTraceScope timeScope("Mark live");
   ScopedTimer t(ctx.gcTimer);
 
   // We build up a worklist of sections which have been marked as live. We only
diff --git a/lld/COFF/MinGW.cpp b/lld/COFF/MinGW.cpp
index 53e146bb8600b8f..e46f5277a8c3654 100644
--- a/lld/COFF/MinGW.cpp
+++ b/lld/COFF/MinGW.cpp
@@ -16,6 +16,7 @@
 #include "llvm/Object/COFF.h"
 #include "llvm/Support/Parallel.h"
 #include "llvm/Support/Path.h"
+#include "llvm/Support/TimeProfiler.h"
 #include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
@@ -172,6 +173,7 @@ bool AutoExporter::shouldExport(Defined *sym) const {
 
 void lld::coff::writeDefFile(StringRef name,
                              const std::vector<Export> &exports) {
+  llvm::TimeTraceScope timeScope("Write .def file");
   std::error_code ec;
   raw_fd_ostream os(name, ec, sys::fs::OF_None);
   if (ec)
diff --git a/lld/COFF/Options.td b/lld/COFF/Options.td
index 22add8ff72b95d3..e7bb952808863be 100644
--- a/lld/COFF/Options.td
+++ b/lld/COFF/Options.td
@@ -290,6 +290,14 @@ def wrap : P_priv<"wrap">;
 
 def vfsoverlay : P<"vfsoverlay", "Path to a vfsoverlay yaml file to optionally look for /defaultlib's in">;
 
+def time_trace_eq: Joined<["--"], "time-trace=">, MetaVarName<"<file>">,
+  HelpText<"Record time trace to <file>">;
+def : Flag<["--"], "time-trace">, Alias<time_trace_eq>,
+  HelpText<"Record time trace to file next to output">;
+
+def time_trace_granularity_eq: Joined<["--"], "time-trace-granularity=">,
+    HelpText<"Minimum time granularity (in microseconds) traced by time profiler">;
+
 // Flags for debugging
 def lldmap : F<"lldmap">;
 def lldmap_file : P_priv<"lldmap">;
diff --git a/lld/COFF/PDB.cpp b/lld/COFF/PDB.cpp
index 167ff54f06c4fe5..effa9dac35bc1e1 100644
--- a/lld/COFF/PDB.cpp
+++ b/lld/COFF/PDB.cpp
@@ -57,6 +57,7 @@
 #include "llvm/Support/FormatVariadic.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/ScopedPrinter.h"
+#include "llvm/Support/TimeProfiler.h"
 #include <memory>
 #include <optional>
 
@@ -1027,6 +1028,7 @@ void PDBLinker::addDebugSymbols(TpiSource *source) {
   if (!source->file)
     return;
 
+  llvm::TimeTraceScope timeScope("Merge symbols");
   ScopedTimer t(ctx.symbolMergingTimer);
   ExitOnError exitOnErr;
   pdb::DbiStreamBuilder &dbiBuilder = builder.getDbiBuilder();
@@ -1101,6 +1103,7 @@ void PDBLinker::addDebug(TpiSource *source) {
   // indices to PDB type and item indices.  If we are using ghashes, types have
   // already been merged.
   if (!ctx.config.debugGHashes) {
+    llvm::TimeTraceScope timeScope("Merge types (Non-GHASH)");
     ScopedTimer t(ctx.typeMergingTimer);
     if (Error e = source->mergeDebugT(&tMerger)) {
       // If type merging failed, ignore the symbols.
@@ -1145,39 +1148,49 @@ static pdb::BulkPublic createPublic(COFFLinkerContext &ctx, Defined *def) {
 // Add all object files to the PDB. Merge .debug$T sections into IpiData and
 // TpiData.
 void PDBLinker::addObjectsToPDB() {
-  ScopedTimer t1(ctx.addObjectsTimer);
-
-  // Create module descriptors
-  for (ObjFile *obj : ctx.objFileInstances)
-    createModuleDBI(obj);
-
-  // Reorder dependency type sources to come first.
-  tMerger.sortDependencies();
-
-  // Merge type information from input files using global type hashing.
-  if (ctx.config.debugGHashes)
-    tMerger.mergeTypesWithGHash();
-
-  // Merge dependencies and then regular objects.
-  for (TpiSource *source : tMerger.dependencySources)
-    addDebug(source);
-  for (TpiSource *source : tMerger.objectSources)
-    addDebug(source);
+  {
+    llvm::TimeTraceScope timeScope("Add objects to PDB");
+    ScopedTimer t1(ctx.addObjectsTimer);
+
+    // Create module descriptors
+    for (ObjFile *obj : ctx.objFileInstances)
+      createModuleDBI(obj);
+
+    // Reorder dependency type sources to come first.
+    tMerger.sortDependencies();
+
+    // Merge type information from input files using global type hashing.
+    if (ctx.config.debugGHashes)
+      tMerger.mergeTypesWithGHash();
+
+    // Merge dependencies and then regular objects.
+    {
+      llvm::TimeTraceScope timeScope("Merge debug info (dependencies)");
+      for (TpiSource *source : tMerger.dependencySources)
+        addDebug(source);
+    }
+    {
+      llvm::TimeTraceScope timeScope("Merge debug info (objects)");
+      for (TpiSource *source : tMerger.objectSources)
+        addDebug(source);
+    }
 
-  builder.getStringTableBuilder().setStrings(pdbStrTab);
-  t1.stop();
+    builder.getStringTableBuilder().setStrings(pdbStrTab);
+  }
 
   // Construct TPI and IPI stream contents.
-  ScopedTimer t2(ctx.tpiStreamLayoutTimer);
+  {
+    llvm::TimeTraceScope timeScope("TPI/IPI stream layout");
+    ScopedTimer t2(ctx.tpiStreamLayoutTimer);
 
-  // Collect all the merged types.
-  if (ctx.config.debugGHashes) {
-    addGHashTypeInfo(ctx, builder);
-  } else {
-    addTypeInfo(builder.getTpiBuilder(), tMerger.getTypeTable());
-    addTypeInfo(builder.getIpiBuilder(), tMerger.getIDTable());
+    // Collect all the merged types.
+    if (ctx.config.debugGHashes) {
+      addGHashTypeInfo(ctx, builder);
+    } else {
+      addTypeInfo(builder.getTpiBuilder(), tMerger.getTypeTable());
+      addTypeInfo(builder.getIpiBuilder(), tMerger.getIDTable());
+    }
   }
-  t2.stop();
 
   if (ctx.config.showSummary) {
     for (TpiSource *source : ctx.tpiSourceList) {
@@ -1188,6 +1201,7 @@ void PDBLinker::addObjectsToPDB() {
 }
 
 void PDBLinker::addPublicsToPDB() {
+  llvm::TimeTraceScope timeScope("Publics layout");
   ScopedTimer t3(ctx.publicsLayoutTimer);
   // Compute the public symbols.
   auto &gsiBuilder = builder.getGsiBuilder();
@@ -1306,6 +1320,7 @@ void PDBLinker::printStats() {
 }
 
 void PDBLinker::addNatvisFiles() {
+  llvm::TimeTraceScope timeScope("Natvis files");
   for (StringRef file : ctx.config.natvisFiles) {
     ErrorOr<std::unique_ptr<MemoryBuffer>> dataOrErr =
         MemoryBuffer::getFile(file);
@@ -1325,6 +1340,7 @@ void PDBLinker::addNatvisFiles() {
 }
 
 void PDBLinker::addNamedStreams() {
+  llvm::TimeTraceScope timeScope("Named streams");
   ExitOnError exitOnErr;
   for (const auto &streamFile : ctx.config.namedStreams) {
     const StringRef stream = streamFile.getKey(), file = streamFile.getValue();
@@ -1499,6 +1515,7 @@ void PDBLinker::addImportFilesToPDB() {
   if (ctx.importFileInstances.empty())
     return;
 
+  llvm::TimeTraceScope timeScope("Import files");
   ExitOnError exitOnErr;
   std::map<std::string, llvm::pdb::DbiModuleDescriptorBuilder *> dllToModuleDbi;
 
@@ -1588,6 +1605,7 @@ void PDBLinker::addImportFilesToPDB() {
 void lld::coff::createPDB(COFFLinkerContext &ctx,
                           ArrayRef<uint8_t> sectionTable,
                           llvm::codeview::DebugInfo *buildId) {
+  llvm::TimeTraceScope timeScope("PDB file");
   ScopedTimer t1(ctx.totalPdbLinkTimer);
   PDBLinker pdb(ctx);
 
@@ -1599,14 +1617,20 @@ void lld::coff::createPDB(COFFLinkerContext &ctx,
   pdb.addNamedStreams();
   pdb.addPublicsToPDB();
 
-  ScopedTimer t2(ctx.diskCommitTimer);
-  codeview::GUID guid;
-  pdb.commit(&guid);
-  memcpy(&buildId->PDB70.Signature, &guid, 16);
+  {
+    llvm::TimeTraceScope timeScope("Commit PDB file to disk");
+    ScopedTimer t2(ctx.diskCommitTimer);
+    codeview::GUID guid;
+    pdb.commit(&guid);
+    memcpy(&buildId->PDB70.Signature, &guid, 16);
+  }
 
-  t2.stop();
   t1.stop();
   pdb.printStats();
+
+  // Manually start this profile point to measure ~PDBLinker().
+  if (getTimeTraceProfilerInstance() != nullptr)
+    timeTraceProfilerBegin("PDBLinker destructor", StringRef(""));
 }
 
 void PDBLinker::initialize(llvm::codeview::DebugInfo *buildId) {
@@ -1641,6 +1665,7 @@ void PDBLinker::initialize(llvm::codeview::DebugInfo *buildId) {
 }
 
 void PDBLinker::addSections(ArrayRef<uint8_t> sectionTable) {
+  llvm::TimeTraceScope timeScope("PDB output sections");
   ExitOnError exitOnErr;
   // It's not entirely clear what this is, but the * Linker * module uses it.
   pdb::DbiStreamBuilder &dbiBuilder = builder.getDbiBuilder();
diff --git a/lld/COFF/SymbolTable.cpp b/lld/COFF/SymbolTable.cpp
index b7217d14391a2c4..d9673e159769fbd 100644
--- a/lld/COFF/SymbolTable.cpp
+++ b/lld/COFF/SymbolTable.cpp
@@ -473,6 +473,7 @@ void SymbolTable::reportUnresolvable() {
 }
 
 void SymbolTable::resolveRemainingUndefines() {
+  llvm::TimeTraceScope timeScope("Resolve remaining undefined symbols");
   SmallPtrSet<Symbol *, 8> undefs;
   DenseMap<Symbol *, Symbol *> localImports;
 
@@ -878,6 +879,7 @@ void SymbolTable::compileBitcodeFiles() {
   if (ctx.bitcodeFileInstances.empty())
     return;
 
+  llvm::TimeTraceScope timeScope("Compile bitcode");
   ScopedTimer t(ctx.ltoTimer);
   lto.reset(new BitcodeCompiler(ctx));
   for (BitcodeFile *f : ctx.bitcodeFileInstances)
diff --git a/lld/COFF/Writer.cpp b/lld/COFF/Writer.cpp
index c9b6318fe5212b0..56494810cea9581 100644
--- a/lld/COFF/Writer.cpp
+++ b/lld/COFF/Writer.cpp
@@ -31,6 +31,7 @@
 #include "llvm/Support/Parallel.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/RandomNumberGenerator.h"
+#include "llvm/Support/TimeProfiler.h"
 #include "llvm/Support/xxhash.h"
 #include <algorithm>
 #include <cstdio>
@@ -320,7 +321,10 @@ class Writer {
 };
 } // anonymous namespace
 
-void lld::coff::writeResult(COFFLinkerContext &ctx) { Writer(ctx).run(); }
+void lld::coff::writeResult(COFFLinkerContext &ctx) {
+  llvm::TimeTraceScope timeScope("Write output(s)");
+  Writer(ctx).run();
+}
 
 void OutputSection::addChunk(Chunk *c) {
   chunks.push_back(c);
@@ -563,16 +567,21 @@ void Writer::finalizeAddresses() {
   int pass = 0;
   int margin = 1024 * 100;
   while (true) {
+    llvm::TimeTraceScope timeScope2("Add thunks pass");
+
     // First check whether we need thunks at all, or if the previous pass of
     // adding them turned out ok.
     bool rangesOk = true;
     size_t numChunks = 0;
-    for (OutputSection *sec : ctx.outputSections) {
-      if (!verifyRanges(sec->chunks)) {
-        rangesOk = false;
-        break;
+    {
+      llvm::TimeTraceScope timeScope3("Verify ranges");
+      for (OutputSection *sec : ctx.outputSections) {
+        if (!verifyRanges(sec->chunks)) {
+          rangesOk = false;
+          break;
+        }
+        numChunks += sec->chunks.size();
       }
-      numChunks += sec->chunks.size();
     }
     if (rangesOk) {
       if (pass > 0)
@@ -596,8 +605,11 @@ void Writer::finalizeAddresses() {
     // Try adding thunks everywhere where it is needed, with a margin
     // to avoid things going out of range due to the added thunks.
     bool addressesChanged = false;
-    for (OutputSection *sec : ctx.outputSections)
-      addressesChanged |= createThunks(sec, margin);
+    {
+      llvm::TimeTraceScope timeScope3("Create thunks");
+      for (OutputSection *sec : ctx.outputSections)
+        addressesChanged |= createThunks(sec, margin);
+    }
     // If the verification above thought we needed thunks, we should have
     // added some.
     assert(addressesChanged);
@@ -616,6 +628,8 @@ void Writer::writePEChecksum() {
     return;
   }
 
+  llvm::TimeTraceScope timeScope("PE checksum");
+
   // https://docs.microsoft.com/en-us/windows/win32/debug/pe-format#checksum
   uint32_t *buf = (uint32_t *)buffer->getBufferStart();
   uint32_t size = (uint32_t)(buffer->getBufferSize());
@@ -650,46 +664,52 @@ void Writer::writePEChecksum() {
 
 // The main function of the writer.
 void Writer::run() {
-  ScopedTimer t1(ctx.codeLayoutTimer);
-
-  createImportTables();
-  createSections();
-  appendImportThunks();
-  // Import thunks must be added before the Control Flow Guard tables are added.
-  createMiscChunks();
-  createExportTable();
-  mergeSections();
-  removeUnusedSections();
-  finalizeAddresses();
-  removeEmptySections();
-  assignOutputSectionIndices();
-  setSectionPermissions();
-  createSymbolAndStringTable();
-
-  if (fileSize > UINT32_MAX)
-    fatal("image size (" + Twine(fileSize) + ") " +
-        "exceeds maximum allowable size (" + Twine(UINT32_MAX) + ")");
-
-  openFile(ctx.config.outputFile);
-  if (ctx.config.is64()) {
-    writeHeader<pe32plus_header>();
-  } else {
-    writeHeader<pe32_header>();
-  }
-  writeSections();
-  checkLoadConfig();
-  sortExceptionTable();
-
-  // Fix up the alignment in the TLS Directory's characteristic field,
-  // if a specific alignment value is needed
-  if (tlsAlignment)
-    fixTlsAlignment();
+  {
+    llvm::TimeTraceScope timeScope("Write PE");
+    ScopedTimer t1(ctx.codeLayoutTimer);
+
+    createImportTables();
+    createSections();
+    appendImportThunks();
+    // Import thunks must be added before the Control Flow Guard tables are
+    // added.
+    createMiscChunks();
+    createExportTable();
+    mergeSections();
+    removeUnusedSections();
+    finalizeAddresses();
+    removeEmptySections();
+    assignOutputSectionIndices();
+    setSectionPermissions();
+    createSymbolAndStringTable();
+
+    if (fileSize > UINT32_MAX)
+      fatal("image size (" + Twine(fileSize) + ") " +
+            "exceeds maximum allowable size (" + Twine(UINT32_MAX) + ")");
+
+    openFile(ctx.config.outputFile);
+    if (ctx.config.is64()) {
+      writeHeader<pe32plus_header>();
+    } else {
+      writeHeader<pe32_header>();
+    }
+    writeSections();
+    checkLoadConfig();
+    sortExceptionTable();
 
-  t1.stop();
+    // Fix up the alignment in the TLS Directory's characteristic field,
+    // if a specific alignment value is needed
+    if (tlsAlignment)
+      fixTlsAlignment();
+  }
 
   if (!ctx.config.pdbPath.empty() && ctx.config.debug) {
     assert(buildId);
     createPDB(ctx, sectionTable, buildId->buildId);
+
+    // Manually end this profile point to measure ~PDBLinker().
+    if (getTimeTraceProfilerInstance() != nullptr)
+      timeTraceProfilerEnd();
   }
   writeBuildId();
 
@@ -701,6 +721,7 @@ void Writer::run() {
   if (errorCount())
     return;
 
+  llvm::TimeTraceScope timeScope("Commit PE to disk");
   ScopedTimer t2(ctx.outputCommitTimer);
   if (auto e = buffer->commit())
     fatal("failed to write output '" + buffer->getPath() +
@@ -878,6 +899,7 @@ void Writer::sortSections() {
 
 // Create output section objects and add them to OutputSections.
 void Writer::createSections() {
+  llvm::TimeTraceScope timeScope("Output sections");
   // First, create the builtin sections.
   const uint32_t data = IMAGE_SCN_CNT_INITIALIZED_DATA;
   const uint32_t bss = IMAGE_SCN_CNT_UNINITIALIZED_DATA;
@@ -1003,6 +1025,7 @@ void Writer::createSections() {
 }
 
 void Writer::createMiscChunks() {
+  llvm::TimeTraceScope timeScope("Misc chunks");
   Configuration *config = &ctx.config;
 
   for (MergeChunk *p : ctx.mergeChunkInstances) {
@@ -1068,6 +1091,7 @@ void Writer::createMiscChunks() {
 // IdataContents class abstracted away the details for us,
 // so we just let it create chunks and add them to the section.
 void Writer::createImportTables() {
+  llvm::TimeTraceScope timeScope("Import tables");
   // Initialize DLLOrder so that import entries are ordered in
   // the same order as in the command line. (That affects DLL
   // initialization order, and this ordering is MSVC-compatible.)
@@ -1097,6 +1121,7 @@ void Writer::appendImportThunks() {
   if (ctx.importFileInstances.empty())
     return;
 
+  llvm::TimeTraceScope timeScope("Import thunks");
   for (ImportFile *file : ctx.importFileInstances) {
     if (!file->live)
       continue;
@@ -1128,6 +1153,7 @@ void Writer::appendImportThunks() {
 }
 
 void Writer::createExportTable() {
+  llvm::TimeTraceScope timeScope("Export table");
   if (!edataSec->chunks.empty()) {
     // Allow using a custom built export table from input object files, instead
     // of having the linker synthesize the tables.
@@ -1148,6 +1174,7 @@ void Writer::createExportTable() {
 }
 
 void Writer::removeUnusedSections() {
+  llvm::TimeTraceScope timeScope("Remove unused sections");
   // Remove sections that we can be sure won't get content, to avoid
   // allocating space for their section headers.
   auto isUnused = [this](OutputSection *s) {
@@ -1163,11 +1190,13 @@ void Writer::removeUnusedSections() {
 // The Windows loader doesn't seem to like empty sections,
 // so we remove them if any.
 void Writer::removeEmptySections() {
+  llvm::TimeTraceScope timeScope("Remove empty sections");
   auto isEmpty = [](OutputSection *s) { return s->getVirtualSize() == 0; };
   llvm::erase_if(ctx.outputSections, isEmpty);
 }
 
 void Writer::assignOutputSectionIndices() {
+  llvm::TimeTraceScope timeScope("Output sections indices");
   // Assign final output section indices, and assign each chunk to its output
   // section.
   uint32_t idx = 1;
@@ -1258,6 +1287,7 @@ std::optional<coff_symbol16> Writer::createSymbol(Defined *def) {
 }
 
 void Writer::createSymbolAndStringTable() {
+  llvm::TimeTraceScope timeScope("Symbol and string table");
   // PE/COFF images are limited to 8 byte section names. Longer names can be
   // supported by writing a non-standard string table, but this string table is
   // not mapped at runtime and the long names will therefore be inaccessible.
@@ -1320,6 +1350,7 @@ void Writer::createSymbolAndStringTable() {
 }
 
 void Writer::mergeSections() {
+  llvm::TimeTraceScope timeScope("Merge sections");
   if (!pdataSec->chunks.empty()) {
     firstPdata = pdataSec->chunks.front();
     lastPdata = pdataSec->chunks.back();
@@ -1353,6 +1384,7 @@ void Writer::mergeSections() {
 // Visits all sections to assign incremental, non-overlapping RVAs and
 // file offsets.
 void Writer::assignAddresses() {
+  llvm::TimeTraceScope timeScope("Assign addresses");
   Configuration *config = &ctx.config;
 
   sizeOfHeaders = dosStubSize + sizeof(PEMagic) + sizeof(coff_file_header) +
@@ -1367,6 +1399,7 @@ void Writer::assignAddresses() {
   uint64_t rva = alignTo(sizeOfHeaders, config->align);
 
   for (OutputSection *sec : ctx.outputSections) {
+    llvm::TimeTraceScope timeScope("Section: ", sec->name);
     if (sec == relocSec)
       addBaserels();
     uint64_t rawSize = 0, virtualSize = 0;
@@ -1947,6 +1980,7 @@ void Writer::insertCtorDtorSymbols() {
 // Handles /section options to allow users to overwrite
 // section attributes.
 void Writer::setSectionPermissions() {
+  llvm::TimeTraceScope timeScope("Sections permissions");
   for (auto &p : ctx.config.section) {
     StringRef name = p.first;
     uint32_t perm = p.second;
@@ -1958,6 +1992,7 @@ void Writer::setSectionPermissions() {
 
 // Write section contents to a mmap'ed file.
 void Writer::writeSections() {
+  llvm::TimeTraceScope timeScope("Write sections");
   uint8_t *buf = buffer->getBufferStart();
   for (OutputSection *sec : ctx.outputSections) {
     uint8_t *secBuf = buf + sec->getFileOff();
@@ -1974,6 +2009,8 @@ void Writer::writeSections() {
 }
 
 void Writer::writeBuildId() {
+  llvm::TimeTraceScope timeScope("Write build ID");
+
   // There are two important parts to the build ID.
   // 1) If building with debug info, the COFF debug directory contains a
   //    timestamp as well as a Guid and Age of the PDB.
@@ -2030,6 +2067,7 @@ void Writer::writeBuildId() {
 void Writer::sortExceptionTable() {
   if (!firstPdata)
     return;
+  llvm::TimeTraceScope timeScope("Sort exception table");
   // We assume .pdata contains function table entries only.
   auto bufAddr = [&](Chunk *c) {
     OutputSection *os = ctx.getOutputSection(c);
@@ -2123,6 +2161,7 @@ void Writer::addBaserels() {
   for (OutputSection *sec : ctx.outputSections) {
     if (sec->header.Characteristics & IMAGE_SCN_MEM_DISCARDABLE)
       continue;
+    llvm::TimeTraceScope timeScope("Base relocations: ", sec->name);
     // Collect all locations for base relocations.
     for (Chunk *c : sec->chunks)
       c->getBaserels(&v);
diff --git a/lld/Common/Filesystem.cpp b/lld/Common/Filesystem.cpp
index 1fc19ef02788603..c2d3644191c9f42 100644
--- a/lld/Common/Filesystem.cpp
+++ b/lld/Common/Filesystem.cpp
@@ -17,6 +17,7 @@
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/Parallel.h"
 #include "llvm/Support/Path.h"
+#include "llvm/Support/TimeProfiler.h"
 #if LLVM_ON_UNIX
 #include <unistd.h>
 #endif
@@ -122,6 +123,7 @@ void lld::unlinkAsync(StringRef path) {
 // is called. We use that class without calling commit() to predict
 // if the given file is writable.
 std::error_code lld::tryCreateFile(StringRef path) {
+  llvm::TimeTraceScope timeScope("Try create output file");
   if (path.empty())
     return std::error_code();
   if (path == "-")
diff --git a/lld/docs/ReleaseNotes.rst b/lld/docs/ReleaseNotes.rst
index 9915f07cc5a7ebc..c322b776ff58f65 100644
--- a/lld/docs/ReleaseNotes.rst
+++ b/lld/docs/ReleaseNotes.rst
@@ -38,6 +38,9 @@ Breaking changes
 COFF Improvements
 -----------------
 
+* Added support for ``--time-trace`` and associated ``--time-trace-granularity``.
+  This generates a .json profile trace of the linker execution.
+
 MinGW Improvements
 ------------------
 
diff --git a/lld/test/COFF/time-trace.s b/lld/test/COFF/time-trace.s
new file mode 100644
index 000000000000000..436da2483a54f61
--- /dev/null
+++ b/lld/test/COFF/time-trace.s
@@ -0,0 +1,44 @@
+# REQUIRES: x86
+
+# RUN: llvm-mc -triple=x86_64-windows-msvc -filetype=obj -o %t.obj %s
+
+# Test implicit trace file name
+# RUN: lld-link %t.obj /entry:main /out:%t1.exe --time-trace --time-trace-granularity=0
+# RUN: cat %t1.exe.time-trace \
+# RUN:   | %python -c 'import json, sys; json.dump(json.loads(sys.stdin.read()), sys.stdout, sort_keys=True, indent=2)' \
+# RUN:   | FileCheck %s
+
+# Test specified trace file name
+# RUN: lld-link %t.obj /entry:main /out:%t2.exe --time-trace=%t2.json --time-trace-granularity=0
+# RUN: cat %t2.json \
+# RUN:   | %python -c 'import json, sys; json.dump(json.loads(sys.stdin.read()), sys.stdout, sort_keys=True, indent=2)' \
+# RUN:   | FileCheck %s
+
+# Test trace requested to stdout
+# RUN: env LLD_IN_TEST=1 lld-link %t.obj /entry:main /out:%t3.exe --time-trace=- --time-trace-granularity=0 \
+# RUN:   | %python -c 'import json, sys; json.dump(json.loads(sys.stdin.read()), sys.stdout, sort_keys=True, indent=2)' \
+# RUN:   | FileCheck %s
+
+# CHECK:      "beginningOfTime": {{[0-9]{16},}}
+# CHECK-NEXT: "traceEvents": [
+
+# Check one event has correct fields
+# CHECK:      "dur":
+# CHECK-NEXT: "name":
+# CHECK-NEXT: "ph":
+# CHECK-NEXT: "pid":
+# CHECK-NEXT: "tid":
+# CHECK-NEXT: "ts":
+
+# Check there are events
+# CHECK: "name": "Read input files"
+
+# Check process_name entry field
+# CHECK: "name": "lld-link{{(.exe)?}}"
+# CHECK: "name": "process_name"
+# CHECK: "name": "thread_name"
+
+.text
+.global main
+main:
+	ret
diff --git a/llvm/lib/DebugInfo/MSF/MSFBuilder.cpp b/llvm/lib/DebugInfo/MSF/MSFBuilder.cpp
index c26caa647ed9f69..e909e851a9051d3 100644
--- a/llvm/lib/DebugInfo/MSF/MSFBuilder.cpp
+++ b/llvm/lib/DebugInfo/MSF/MSFBuilder.cpp
@@ -16,6 +16,7 @@
 #include "llvm/Support/Error.h"
 #include "llvm/Support/FileOutputBuffer.h"
 #include "llvm/Support/FormatVariadic.h"
+#include "llvm/Support/TimeProfiler.h"
 #include <algorithm>
 #include <cassert>
 #include <cstdint>
@@ -248,6 +249,8 @@ uint32_t MSFBuilder::computeDirectoryByteSize() const {
 }
 
 Expected<MSFLayout> MSFBuilder::generateLayout() {
+  llvm::TimeTraceScope timeScope("MSF: Generate layout");
+
   SuperBlock *SB = Allocator.Allocate<SuperBlock>();
   MSFLayout L;
   L.SB = SB;
@@ -336,6 +339,8 @@ static void commitFpm(WritableBinaryStream &MsfBuffer, const MSFLayout &Layout,
 
 Expected<FileBufferByteStream> MSFBuilder::commit(StringRef Path,
                                                   MSFLayout &Layout) {
+  llvm::TimeTraceScope timeScope("Commit MSF");
+
   Expected<MSFLayout> L = generateLayout();
   if (!L)
     return L.takeError();
diff --git a/llvm/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp b/llvm/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp
index 37c1b0407268cb5..fb8b11b9c1588db 100644
--- a/llvm/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp
+++ b/llvm/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp
@@ -18,6 +18,7 @@
 #include "llvm/Object/COFF.h"
 #include "llvm/Support/BinaryStreamWriter.h"
 #include "llvm/Support/Parallel.h"
+#include "llvm/Support/TimeProfiler.h"
 
 using namespace llvm;
 using namespace llvm::codeview;
@@ -381,6 +382,7 @@ void DbiStreamBuilder::createSectionMap(
 
 Error DbiStreamBuilder::commit(const msf::MSFLayout &Layout,
                                WritableBinaryStreamRef MsfBuffer) {
+  llvm::TimeTraceScope timeScope("Commit DBI stream");
   if (auto EC = finalize())
     return EC;
 
diff --git a/llvm/lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp b/llvm/lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp
index b17fbd63e9fd4df..9f9bd47aa63387c 100644
--- a/llvm/lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp
+++ b/llvm/lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp
@@ -26,6 +26,7 @@
 #include "llvm/Support/BinaryItemStream.h"
 #include "llvm/Support/BinaryStreamWriter.h"
 #include "llvm/Support/Parallel.h"
+#include "llvm/Support/TimeProfiler.h"
 #include "llvm/Support/xxhash.h"
 #include <algorithm>
 #include <vector>
@@ -478,6 +479,7 @@ Error GSIStreamBuilder::commitGlobalsHashStream(
 
 Error GSIStreamBuilder::commit(const msf::MSFLayout &Layout,
                                WritableBinaryStreamRef Buffer) {
+  llvm::TimeTraceScope timeScope("Commit GSI stream");
   auto GS = WritableMappedBlockStream::createIndexedStream(
       Layout, Buffer, getGlobalsStreamIndex(), Msf.getAllocator());
   auto PS = WritableMappedBlockStream::createIndexedStream(
diff --git a/llvm/lib/DebugInfo/PDB/Native/InfoStreamBuilder.cpp b/llvm/lib/DebugInfo/PDB/Native/InfoStreamBuilder.cpp
index e8f5a451b08ea73..95107125701de28 100644
--- a/llvm/lib/DebugInfo/PDB/Native/InfoStreamBuilder.cpp
+++ b/llvm/lib/DebugInfo/PDB/Native/InfoStreamBuilder.cpp
@@ -14,6 +14,7 @@
 #include "llvm/DebugInfo/PDB/Native/RawTypes.h"
 #include "llvm/Support/BinaryStreamReader.h"
 #include "llvm/Support/BinaryStreamWriter.h"
+#include "llvm/Support/TimeProfiler.h"
 
 using namespace llvm;
 using namespace llvm::codeview;
@@ -55,6 +56,7 @@ Error InfoStreamBuilder::finalizeMsfLayout() {
 
 Error InfoStreamBuilder::commit(const msf::MSFLayout &Layout,
                                 WritableBinaryStreamRef Buffer) const {
+  llvm::TimeTraceScope timeScope("Commit info stream");
   auto InfoS = WritableMappedBlockStream::createIndexedStream(
       Layout, Buffer, StreamPDB, Msf.getAllocator());
   BinaryStreamWriter Writer(*InfoS);
diff --git a/llvm/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp b/llvm/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp
index cd30b56be7cd85b..06e379c3f6d237c 100644
--- a/llvm/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp
+++ b/llvm/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp
@@ -25,6 +25,7 @@
 #include "llvm/Support/BinaryStreamWriter.h"
 #include "llvm/Support/CRC.h"
 #include "llvm/Support/Path.h"
+#include "llvm/Support/TimeProfiler.h"
 #include "llvm/Support/xxhash.h"
 
 #include <ctime>
@@ -129,6 +130,7 @@ void PDBFileBuilder::addInjectedSource(StringRef Name,
 }
 
 Error PDBFileBuilder::finalizeMsfLayout() {
+  llvm::TimeTraceScope timeScope("MSF layout");
 
   if (Ipi && Ipi->getRecordCount() > 0) {
     // In theory newer PDBs always have an ID stream, but by saying that we're
@@ -254,6 +256,7 @@ void PDBFileBuilder::commitInjectedSources(WritableBinaryStream &MsfBuffer,
   if (InjectedSourceTable.empty())
     return;
 
+  llvm::TimeTraceScope timeScope("Commit injected sources");
   commitSrcHeaderBlock(MsfBuffer, Layout);
 
   for (const auto &IS : InjectedSources) {
@@ -290,15 +293,18 @@ Error PDBFileBuilder::commit(StringRef Filename, codeview::GUID *Guid) {
   if (auto EC = Strings.commit(NSWriter))
     return EC;
 
-  for (const auto &NSE : NamedStreamData) {
-    if (NSE.second.empty())
-      continue;
-
-    auto NS = WritableMappedBlockStream::createIndexedStream(
-        Layout, Buffer, NSE.first, Allocator);
-    BinaryStreamWriter NSW(*NS);
-    if (auto EC = NSW.writeBytes(arrayRefFromStringRef(NSE.second)))
-      return EC;
+  {
+    llvm::TimeTraceScope timeScope("Named stream data");
+    for (const auto &NSE : NamedStreamData) {
+      if (NSE.second.empty())
+        continue;
+
+      auto NS = WritableMappedBlockStream::createIndexedStream(
+          Layout, Buffer, NSE.first, Allocator);
+      BinaryStreamWriter NSW(*NS);
+      if (auto EC = NSW.writeBytes(arrayRefFromStringRef(NSE.second)))
+        return EC;
+    }
   }
 
   if (Info) {
@@ -338,6 +344,8 @@ Error PDBFileBuilder::commit(StringRef Filename, codeview::GUID *Guid) {
   // Set the build id at the very end, after every other byte of the PDB
   // has been written.
   if (Info->hashPDBContentsToGUID()) {
+    llvm::TimeTraceScope timeScope("Compute build ID");
+
     // Compute a hash of all sections of the output file.
     uint64_t Digest =
         xxh3_64bits({Buffer.getBufferStart(), Buffer.getBufferEnd()});
diff --git a/llvm/lib/DebugInfo/PDB/Native/PDBStringTableBuilder.cpp b/llvm/lib/DebugInfo/PDB/Native/PDBStringTableBuilder.cpp
index c0245dc17cf1ef8..91b3dd5c32b9f67 100644
--- a/llvm/lib/DebugInfo/PDB/Native/PDBStringTableBuilder.cpp
+++ b/llvm/lib/DebugInfo/PDB/Native/PDBStringTableBuilder.cpp
@@ -13,6 +13,7 @@
 #include "llvm/DebugInfo/PDB/Native/RawTypes.h"
 #include "llvm/Support/BinaryStreamWriter.h"
 #include "llvm/Support/Endian.h"
+#include "llvm/Support/TimeProfiler.h"
 
 #include <map>
 
@@ -207,6 +208,7 @@ Error PDBStringTableBuilder::writeEpilogue(BinaryStreamWriter &Writer) const {
 }
 
 Error PDBStringTableBuilder::commit(BinaryStreamWriter &Writer) const {
+  llvm::TimeTraceScope timeScope("Commit strings table");
   BinaryStreamWriter SectionWriter;
 
   std::tie(SectionWriter, Writer) = Writer.split(sizeof(PDBStringTableHeader));
diff --git a/llvm/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp b/llvm/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp
index aad5847651a0db2..30d5a563e8bdae9 100644
--- a/llvm/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp
+++ b/llvm/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp
@@ -19,6 +19,7 @@
 #include "llvm/Support/BinaryStreamWriter.h"
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/Error.h"
+#include "llvm/Support/TimeProfiler.h"
 #include <algorithm>
 #include <cstdint>
 #include <numeric>
@@ -171,6 +172,7 @@ Error TpiStreamBuilder::finalizeMsfLayout() {
 
 Error TpiStreamBuilder::commit(const msf::MSFLayout &Layout,
                                WritableBinaryStreamRef Buffer) {
+  llvm::TimeTraceScope timeScope("Commit TPI stream");
   if (auto EC = finalize())
     return EC;
 
diff --git a/llvm/lib/Support/FileOutputBuffer.cpp b/llvm/lib/Support/FileOutputBuffer.cpp
index c11ee59da0dda33..58a06a34e8cf39d 100644
--- a/llvm/lib/Support/FileOutputBuffer.cpp
+++ b/llvm/lib/Support/FileOutputBuffer.cpp
@@ -14,6 +14,7 @@
 #include "llvm/Support/Errc.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/Memory.h"
+#include "llvm/Support/TimeProfiler.h"
 #include <system_error>
 
 #if !defined(_MSC_VER) && !defined(__MINGW32__)
@@ -43,6 +44,8 @@ class OnDiskBuffer : public FileOutputBuffer {
   size_t getBufferSize() const override { return Buffer.size(); }
 
   Error commit() override {
+    llvm::TimeTraceScope timeScope("Commit buffer to disk");
+
     // Unmap buffer, letting OS flush dirty pages to file on disk.
     Buffer.unmap();
 

>From 5c94e690a7d8d6d3646387da3a5bd217a22ecf71 Mon Sep 17 00:00:00 2001
From: Alexandre Ganea <alex_toresh at yahoo.fr>
Date: Wed, 4 Oct 2023 14:24:00 -0400
Subject: [PATCH 2/3] Fix topmost profile scope. This also fixes the Asserts
 compilation.

---
 lld/COFF/Driver.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/lld/COFF/Driver.cpp b/lld/COFF/Driver.cpp
index c6a7be14017fe7f..a44f4ae21e90979 100644
--- a/lld/COFF/Driver.cpp
+++ b/lld/COFF/Driver.cpp
@@ -2586,6 +2586,9 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
     ctx.rootTimer.print();
 
   if (config->timeTraceEnabled) {
+    // Manually stop the topmost "COFF link" scope, since we're shutting down.
+    timeTraceProfilerEnd();
+
     checkError(timeTraceProfilerWrite(
         args.getLastArgValue(OPT_time_trace_eq).str(), config->outputFile));
     timeTraceProfilerCleanup();

>From 7f033f4047e780ee41a2ff6be24bbbefef5c6b43 Mon Sep 17 00:00:00 2001
From: Alexandre Ganea <alex_toresh at yahoo.fr>
Date: Thu, 5 Oct 2023 10:00:32 -0400
Subject: [PATCH 3/3] Add some more profile scopes in the Driver.

---
 lld/COFF/Driver.cpp | 116 +++++++++++++++++++++++++-------------------
 1 file changed, 67 insertions(+), 49 deletions(-)

diff --git a/lld/COFF/Driver.cpp b/lld/COFF/Driver.cpp
index a44f4ae21e90979..c7f4e1f767a5431 100644
--- a/lld/COFF/Driver.cpp
+++ b/lld/COFF/Driver.cpp
@@ -1505,8 +1505,11 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
     v.push_back(arg->getValue());
     config->mllvmOpts.emplace_back(arg->getValue());
   }
-  cl::ResetAllOptionOccurrences();
-  cl::ParseCommandLineOptions(v.size(), v.data());
+  {
+    llvm::TimeTraceScope timeScope2("Parse cl::opt");
+    cl::ResetAllOptionOccurrences();
+    cl::ParseCommandLineOptions(v.size(), v.data());
+  }
 
   // Handle /errorlimit early, because error() depends on it.
   if (auto *arg = args.getLastArg(OPT_errorlimit)) {
@@ -1559,15 +1562,18 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
                                   " (use --error-limit=0 to see all errors)";
 
   // Handle /linkrepro and /reproduce.
-  if (std::optional<std::string> path = getReproduceFile(args)) {
-    Expected<std::unique_ptr<TarWriter>> errOrWriter =
-        TarWriter::create(*path, sys::path::stem(*path));
+  {
+    llvm::TimeTraceScope timeScope2("Reproducer");
+    if (std::optional<std::string> path = getReproduceFile(args)) {
+      Expected<std::unique_ptr<TarWriter>> errOrWriter =
+          TarWriter::create(*path, sys::path::stem(*path));
 
-    if (errOrWriter) {
-      tar = std::move(*errOrWriter);
-    } else {
-      error("/linkrepro: failed to open " + *path + ": " +
-            toString(errOrWriter.takeError()));
+      if (errOrWriter) {
+        tar = std::move(*errOrWriter);
+      } else {
+        error("/linkrepro: failed to open " + *path + ": " +
+              toString(errOrWriter.takeError()));
+      }
     }
   }
 
@@ -1579,14 +1585,17 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
   }
 
   // Construct search path list.
-  searchPaths.emplace_back("");
-  // Prefer the Clang provided builtins over the ones bundled with MSVC.
-  addClangLibSearchPaths(argsArr[0]);
-  for (auto *arg : args.filtered(OPT_libpath))
-    searchPaths.push_back(arg->getValue());
-  detectWinSysRoot(args);
-  if (!args.hasArg(OPT_lldignoreenv) && !args.hasArg(OPT_winsysroot))
-    addLibSearchPaths();
+  {
+    llvm::TimeTraceScope timeScope2("Search paths");
+    searchPaths.emplace_back("");
+    // Prefer the Clang provided builtins over the ones bundled with MSVC.
+    addClangLibSearchPaths(argsArr[0]);
+    for (auto *arg : args.filtered(OPT_libpath))
+      searchPaths.push_back(arg->getValue());
+    detectWinSysRoot(args);
+    if (!args.hasArg(OPT_lldignoreenv) && !args.hasArg(OPT_winsysroot))
+      addLibSearchPaths();
+  }
 
   // Handle /ignore
   for (auto *arg : args.filtered(OPT_ignore)) {
@@ -1721,16 +1730,22 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
       args.hasFlag(OPT_appcontainer, OPT_appcontainer_no, false);
 
   // Handle /machine
-  if (auto *arg = args.getLastArg(OPT_machine)) {
-    config->machine = getMachineType(arg->getValue());
-    if (config->machine == IMAGE_FILE_MACHINE_UNKNOWN)
-      fatal(Twine("unknown /machine argument: ") + arg->getValue());
-    addWinSysRootLibSearchPaths();
+  {
+    llvm::TimeTraceScope timeScope2("Machine arg");
+    if (auto *arg = args.getLastArg(OPT_machine)) {
+      config->machine = getMachineType(arg->getValue());
+      if (config->machine == IMAGE_FILE_MACHINE_UNKNOWN)
+        fatal(Twine("unknown /machine argument: ") + arg->getValue());
+      addWinSysRootLibSearchPaths();
+    }
   }
 
   // Handle /nodefaultlib:<filename>
-  for (auto *arg : args.filtered(OPT_nodefaultlib))
-    config->noDefaultLibs.insert(findLib(arg->getValue()).lower());
+  {
+    llvm::TimeTraceScope timeScope2("Nodefaultlib");
+    for (auto *arg : args.filtered(OPT_nodefaultlib))
+      config->noDefaultLibs.insert(findLib(arg->getValue()).lower());
+  }
 
   // Handle /nodefaultlib
   if (args.hasArg(OPT_nodefaultlib_all))
@@ -2068,30 +2083,33 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
   // Create a list of input files. These can be given as OPT_INPUT options
   // and OPT_wholearchive_file options, and we also need to track OPT_start_lib
   // and OPT_end_lib.
-  bool inLib = false;
-  for (auto *arg : args) {
-    switch (arg->getOption().getID()) {
-    case OPT_end_lib:
-      if (!inLib)
-        error("stray " + arg->getSpelling());
-      inLib = false;
-      break;
-    case OPT_start_lib:
-      if (inLib)
-        error("nested " + arg->getSpelling());
-      inLib = true;
-      break;
-    case OPT_wholearchive_file:
-      if (std::optional<StringRef> path = findFileIfNew(arg->getValue()))
-        enqueuePath(*path, true, inLib);
-      break;
-    case OPT_INPUT:
-      if (std::optional<StringRef> path = findFileIfNew(arg->getValue()))
-        enqueuePath(*path, isWholeArchive(*path), inLib);
-      break;
-    default:
-      // Ignore other options.
-      break;
+  {
+    llvm::TimeTraceScope timeScope2("Parse & queue inputs");
+    bool inLib = false;
+    for (auto *arg : args) {
+      switch (arg->getOption().getID()) {
+      case OPT_end_lib:
+        if (!inLib)
+          error("stray " + arg->getSpelling());
+        inLib = false;
+        break;
+      case OPT_start_lib:
+        if (inLib)
+          error("nested " + arg->getSpelling());
+        inLib = true;
+        break;
+      case OPT_wholearchive_file:
+        if (std::optional<StringRef> path = findFileIfNew(arg->getValue()))
+          enqueuePath(*path, true, inLib);
+        break;
+      case OPT_INPUT:
+        if (std::optional<StringRef> path = findFileIfNew(arg->getValue()))
+          enqueuePath(*path, isWholeArchive(*path), inLib);
+        break;
+      default:
+        // Ignore other options.
+        break;
+      }
     }
   }
 



More information about the llvm-commits mailing list