[flang-commits] [clang] [flang] [flang][Driver] Preliminary support for -ftime-report (PR #107270)

Tarun Prabhu via flang-commits flang-commits at lists.llvm.org
Thu Sep 5 08:05:23 PDT 2024


https://github.com/tarunprabhu updated https://github.com/llvm/llvm-project/pull/107270

>From 2f4a8503c605f7401bf9312b59a72b6b3ccdbb7f Mon Sep 17 00:00:00 2001
From: Tarun Prabhu <tarun.prabhu at gmail.com>
Date: Wed, 4 Sep 2024 10:24:31 -0600
Subject: [PATCH 1/3] [flang][Driver] Preliminary support for -ftime-report

The behavior is not entirely consistent with that of clang for the moment
since detailed timing information on the LLVM IR optimization and code
generation passes is not provided. The -ftime-report= option is also not enabled
since that is only relevant for information about the LLVM IR passes. However,
some code to handle that option has been included, to make it easier to support
the option when the issues blocking it are resolved. A FortranSupport library
has been created that is intended to mirror the LLVM and MLIR support libraries.
---
 clang/include/clang/Driver/Options.td         |  2 +-
 clang/lib/Driver/ToolChains/Flang.cpp         |  4 +-
 .../include/flang/Frontend/CompilerInstance.h | 59 +++++++++++++++
 .../flang/Frontend/CompilerInvocation.h       | 15 ++++
 flang/include/flang/Support/StringOstream.h   | 32 ++++++++
 flang/include/flang/Support/Timing.h          | 27 +++++++
 flang/lib/CMakeLists.txt                      |  1 +
 flang/lib/Frontend/CMakeLists.txt             |  1 +
 flang/lib/Frontend/CompilerInstance.cpp       | 51 ++++++++++++-
 flang/lib/Frontend/CompilerInvocation.cpp     | 18 +++++
 flang/lib/Frontend/FrontendActions.cpp        | 73 +++++++++++++++++--
 flang/lib/Support/CMakeLists.txt              |  9 +++
 flang/lib/Support/Timing.cpp                  | 67 +++++++++++++++++
 flang/test/Driver/time-report-eq.f90          | 18 +++++
 flang/test/Driver/time-report.f90             | 22 ++++++
 15 files changed, 390 insertions(+), 9 deletions(-)
 create mode 100644 flang/include/flang/Support/StringOstream.h
 create mode 100644 flang/include/flang/Support/Timing.h
 create mode 100644 flang/lib/Support/CMakeLists.txt
 create mode 100644 flang/lib/Support/Timing.cpp
 create mode 100644 flang/test/Driver/time-report-eq.f90
 create mode 100644 flang/test/Driver/time-report.f90

diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 1b9b3f2c6600a3..6a0a2b40cd192e 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -4012,7 +4012,7 @@ defm threadsafe_statics : BoolFOption<"threadsafe-statics",
           "Do not emit code to make initialization of local statics thread safe">,
   PosFlag<SetTrue>>;
 def ftime_report : Flag<["-"], "ftime-report">, Group<f_Group>,
-  Visibility<[ClangOption, CC1Option]>,
+  Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>,
   MarshallingInfoFlag<CodeGenOpts<"TimePasses">>;
 def ftime_report_EQ: Joined<["-"], "ftime-report=">, Group<f_Group>,
   Visibility<[ClangOption, CC1Option]>, Values<"per-pass,per-pass-run">,
diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp
index 6ce79d27e98c48..c1e724d7761dc0 100644
--- a/clang/lib/Driver/ToolChains/Flang.cpp
+++ b/clang/lib/Driver/ToolChains/Flang.cpp
@@ -150,7 +150,9 @@ void Flang::addCodegenOptions(const ArgList &Args,
                             options::OPT_flang_deprecated_no_hlfir,
                             options::OPT_flang_experimental_integer_overflow,
                             options::OPT_fno_ppc_native_vec_elem_order,
-                            options::OPT_fppc_native_vec_elem_order});
+                            options::OPT_fppc_native_vec_elem_order,
+                            options::OPT_ftime_report,
+                            options::OPT_ftime_report_EQ});
 }
 
 void Flang::addPicOptions(const ArgList &Args, ArgStringList &CmdArgs) const {
diff --git a/flang/include/flang/Frontend/CompilerInstance.h b/flang/include/flang/Frontend/CompilerInstance.h
index 4fcc59f7cf577b..2844900063a9e5 100644
--- a/flang/include/flang/Frontend/CompilerInstance.h
+++ b/flang/include/flang/Frontend/CompilerInstance.h
@@ -20,6 +20,7 @@
 #include "flang/Parser/provenance.h"
 #include "flang/Semantics/runtime-type-info.h"
 #include "flang/Semantics/semantics.h"
+#include "flang/Support/StringOstream.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetMachine.h"
 
@@ -85,6 +86,27 @@ class CompilerInstance {
   /// facilitate this. It is optional and will normally be just a nullptr.
   std::unique_ptr<llvm::raw_pwrite_stream> outputStream;
 
+  /// @name Timing
+  /// Objects needed when timing is enabled.
+  /// @{
+  /// The timing manager.
+  mlir::DefaultTimingManager timingMgr;
+
+  /// The root of the timingScope. This will be reset in @ref executeAction if
+  /// timers have been enabled.
+  mlir::TimingScope timingScopeRoot;
+
+  /// @name Timing stream
+  /// The output streams to capture the timing. Three different streams are
+  /// needed because the timing classes all work slightly differently. We create
+  /// these streams so we have control over when and how the timing is
+  /// displayed. Otherwise, the timing is only displayed when the corresponding
+  /// managers/timers go out of scope.
+  std::unique_ptr<Fortran::support::string_ostream> timingStreamMLIR;
+  std::unique_ptr<Fortran::support::string_ostream> timingStreamLLVM;
+  std::unique_ptr<Fortran::support::string_ostream> timingStreamCodeGen;
+  /// @}
+
 public:
   explicit CompilerInstance();
 
@@ -254,6 +276,43 @@ class CompilerInstance {
   /// Produces the string which represents target feature
   std::string getTargetFeatures();
 
+  /// {
+  /// @name Timing
+  /// @{
+  bool isTimingEnabled() { return timingMgr.isEnabled(); }
+  bool isTimingEnabled() const { return timingMgr.isEnabled(); }
+
+  mlir::DefaultTimingManager &getTimingManager() { return timingMgr; }
+  const mlir::DefaultTimingManager &getTimingManager() const {
+    return timingMgr;
+  }
+
+  mlir::TimingScope &getTimingScopeRoot() { return timingScopeRoot; }
+  const mlir::TimingScope &getTimingScopeRoot() const {
+    return timingScopeRoot;
+  }
+
+  /// Get the timing stream for the MLIR pass manager.
+  llvm::raw_ostream &getTimingStreamMLIR() {
+    assert(timingStreamMLIR && "Timing stream for MLIR was not set");
+    return *timingStreamMLIR;
+  }
+
+  /// Get the timing stream for the new LLVM pass manager.
+  llvm::raw_ostream &getTimingStreamLLVM() {
+    assert(timingStreamLLVM && "Timing stream for LLVM was not set");
+    return *timingStreamLLVM;
+  }
+
+  /// Get the timing stream fro the legacy LLVM pass manager.
+  /// NOTE: If the codegen is updated to use the new pass manager, this should
+  /// no longer be needed.
+  llvm::raw_ostream &getTimingStreamCodeGen() {
+    assert(timingStreamCodeGen && "Timing stream for codegen was not set");
+    return *timingStreamCodeGen;
+  }
+  /// @}
+
 private:
   /// Create a new output file
   ///
diff --git a/flang/include/flang/Frontend/CompilerInvocation.h b/flang/include/flang/Frontend/CompilerInvocation.h
index d1646f585cf850..4f848989aa1c80 100644
--- a/flang/include/flang/Frontend/CompilerInvocation.h
+++ b/flang/include/flang/Frontend/CompilerInvocation.h
@@ -21,6 +21,7 @@
 #include "flang/Lower/LoweringOptions.h"
 #include "flang/Parser/parsing.h"
 #include "flang/Semantics/semantics.h"
+#include "mlir/Support/Timing.h"
 #include "clang/Basic/Diagnostic.h"
 #include "clang/Basic/DiagnosticOptions.h"
 #include "llvm/Option/ArgList.h"
@@ -143,6 +144,14 @@ class CompilerInvocation : public CompilerInvocationBase {
       },
   };
 
+  /// Whether to time the invocation. Set when -ftime-report or -ftime-report=
+  /// is enabled.
+  bool enableTimers;
+
+  /// Whether to report the timing of each run of an LLVM pass. Set when
+  /// -ftime-report=per-pass-run is enabled.
+  bool timeLLVMPassesPerRun;
+
 public:
   CompilerInvocation() = default;
 
@@ -222,6 +231,12 @@ class CompilerInvocation : public CompilerInvocationBase {
     return defaultKinds;
   }
 
+  bool getEnableTimers() { return enableTimers; }
+  bool getEnableTimers() const { return enableTimers; }
+
+  bool getTimeLLVMPassesPerRun() { return timeLLVMPassesPerRun; }
+  bool getTimeLLVMPassesPerRun() const { return timeLLVMPassesPerRun; }
+
   /// Create a compiler invocation from a list of input options.
   /// \returns true on success.
   /// \returns false if an error was encountered while parsing the arguments
diff --git a/flang/include/flang/Support/StringOstream.h b/flang/include/flang/Support/StringOstream.h
new file mode 100644
index 00000000000000..2e5c87eae058c6
--- /dev/null
+++ b/flang/include/flang/Support/StringOstream.h
@@ -0,0 +1,32 @@
+//===-- CompilerInstance.h - Flang Compiler Instance ------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Coding style: https://mlir.llvm.org/getting_started/DeveloperGuide/
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef FORTRAN_SUPPORT_STRINGOSTREAM_H
+#define FORTRAN_SUPPORT_STRINGOSTREAM_H
+
+#include <llvm/Support/raw_ostream.h>
+
+namespace Fortran::support {
+
+/// Helper class to maintain both the an llvm::raw_string_ostream object and
+/// its associated buffer.
+class string_ostream : public llvm::raw_string_ostream {
+private:
+  std::string buf;
+
+public:
+  string_ostream() : llvm::raw_string_ostream(buf) {}
+};
+
+} // namespace Fortran::support
+
+#endif // FORTRAN_SUPPORT_STRINGOSTREAM_H
diff --git a/flang/include/flang/Support/Timing.h b/flang/include/flang/Support/Timing.h
new file mode 100644
index 00000000000000..75ba2a8d85f39f
--- /dev/null
+++ b/flang/include/flang/Support/Timing.h
@@ -0,0 +1,27 @@
+//===- Timing.h - Execution time measurement facilities ---------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Facilities to measure and provide statistics on execution time.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef FORTRAN_SUPPORT_TIMING_H
+#define FORTRAN_SUPPORT_TIMING_H
+
+#include "mlir/Support/Timing.h"
+
+namespace Fortran::support {
+
+/// Create a strategy to render the captured times in plain text. This is
+/// intended to be passed to a TimingManager.
+std::unique_ptr<mlir::OutputStrategy> createTimingFormatterText(
+    llvm::raw_ostream &os);
+
+} // namespace Fortran::support
+
+#endif // FORTRAN_SUPPORT_TIMING_H
diff --git a/flang/lib/CMakeLists.txt b/flang/lib/CMakeLists.txt
index f41d4df1f07e3c..2182e845b6a793 100644
--- a/flang/lib/CMakeLists.txt
+++ b/flang/lib/CMakeLists.txt
@@ -4,6 +4,7 @@ add_subdirectory(Decimal)
 add_subdirectory(Lower)
 add_subdirectory(Parser)
 add_subdirectory(Semantics)
+add_subdirectory(Support)
 add_subdirectory(Frontend)
 add_subdirectory(FrontendTool)
 
diff --git a/flang/lib/Frontend/CMakeLists.txt b/flang/lib/Frontend/CMakeLists.txt
index ecdcc73d61ec1f..f679f23b00d2c8 100644
--- a/flang/lib/Frontend/CMakeLists.txt
+++ b/flang/lib/Frontend/CMakeLists.txt
@@ -30,6 +30,7 @@ add_flang_library(flangFrontend
   FortranEvaluate
   FortranCommon
   FortranLower
+  FortranSupport
   FIRDialect
   FIRDialectSupport
   FIRSupport
diff --git a/flang/lib/Frontend/CompilerInstance.cpp b/flang/lib/Frontend/CompilerInstance.cpp
index d37430e0e5773e..7cc9cf63aeee1f 100644
--- a/flang/lib/Frontend/CompilerInstance.cpp
+++ b/flang/lib/Frontend/CompilerInstance.cpp
@@ -17,9 +17,12 @@
 #include "flang/Parser/parsing.h"
 #include "flang/Parser/provenance.h"
 #include "flang/Semantics/semantics.h"
+#include "flang/Support/Timing.h"
+#include "mlir/Support/RawOstreamExtras.h"
 #include "clang/Basic/DiagnosticFrontend.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/MC/TargetRegistry.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/Errc.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/FileSystem.h"
@@ -147,7 +150,7 @@ void CompilerInstance::clearOutputFiles(bool eraseFiles) {
 }
 
 bool CompilerInstance::executeAction(FrontendAction &act) {
-  auto &invoc = this->getInvocation();
+  CompilerInvocation &invoc = this->getInvocation();
 
   llvm::Triple targetTriple{llvm::Triple(invoc.getTargetOpts().triple)};
   if (targetTriple.getArch() == llvm::Triple::ArchType::x86_64) {
@@ -167,6 +170,32 @@ bool CompilerInstance::executeAction(FrontendAction &act) {
   // Set options controlling lowering to FIR.
   invoc.setLoweringOptions();
 
+  if (invoc.getEnableTimers()) {
+    // FIXME: Currently, enabling these results in a duplicate registration
+    // error of the "sort-timers" command line option. It is not clear why that
+    // is occurring. Without setting these, we cannot get detailed information
+    // about the runtime of the LLVM IR optimization and code generation passes.
+    // Once the root cause of this is determined, we should enable this to have
+    // behavior that is comparable to clang.
+    // llvm::TimePassesIsEnabled = true;
+    // llvm::TimePassesPerRun = invoc.getTimeLLVMPassesPerRun();
+
+    timingStreamMLIR = std::make_unique<Fortran::support::string_ostream>();
+    timingStreamLLVM = std::make_unique<Fortran::support::string_ostream>();
+    timingStreamCodeGen = std::make_unique<Fortran::support::string_ostream>();
+
+    timingMgr.setEnabled(true);
+    timingMgr.setDisplayMode(mlir::DefaultTimingManager::DisplayMode::Tree);
+    timingMgr.setOutput(
+        Fortran::support::createTimingFormatterText(*timingStreamMLIR));
+
+    // Creating a new TimingScope will automatically start the timer. Since this
+    // is the top-level timer, this is ok because it will end up capturing the
+    // time for all the bookkeeping and other tasks that take place between
+    // parsing, lowering etc. for which finer-grained timers will be created.
+    timingScopeRoot = timingMgr.getRootScope();
+  }
+
   // Run the frontend action `act` for every input file.
   for (const FrontendInputFile &fif : getFrontendOpts().inputs) {
     if (act.beginSourceFile(*this, fif)) {
@@ -176,6 +205,26 @@ bool CompilerInstance::executeAction(FrontendAction &act) {
       act.endSourceFile();
     }
   }
+
+  if (timingMgr.isEnabled()) {
+    timingScopeRoot.stop();
+
+    // Write the timings to the associated output stream and clear all timers.
+    // We need to provide another stream because the TimingManager will attempt
+    // to print in its destructor even if it has been cleared. By the time that
+    // destructor runs, the output streams will have been destroyed, so give it
+    // a null stream.
+    timingMgr.print();
+    timingMgr.setOutput(
+        Fortran::support::createTimingFormatterText(mlir::thread_safe_nulls()));
+
+    // This is deliberately done in "reverse" order and does not match the
+    // behavior of clang.
+    llvm::errs() << timingStreamCodeGen->str() << "\n";
+    llvm::errs() << timingStreamLLVM->str() << "\n";
+    llvm::errs() << timingStreamMLIR->str() << "\n";
+  }
+
   return !getDiagnostics().getClient()->getNumErrors();
 }
 
diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp
index 1d73397d330178..8607f6316d3167 100644
--- a/flang/lib/Frontend/CompilerInvocation.cpp
+++ b/flang/lib/Frontend/CompilerInvocation.cpp
@@ -1358,6 +1358,24 @@ bool CompilerInvocation::createFromArgs(
     }
   }
 
+  // Process the timing-related options.
+  if (const llvm::opt::Arg *a =
+          args.getLastArg(clang::driver::options::OPT_ftime_report,
+                          clang::driver::options::OPT_ftime_report_EQ)) {
+    invoc.enableTimers = true;
+    if (a->getOption().getID() == clang::driver::options::OPT_ftime_report_EQ) {
+      llvm::StringRef val = a->getValue();
+      if (val == "per-pass") {
+        invoc.timeLLVMPassesPerRun = false;
+      } else if (val == "per-pass-run") {
+        invoc.timeLLVMPassesPerRun = true;
+      } else {
+        diags.Report(clang::diag::err_drv_invalid_value)
+            << a->getAsString(args) << a->getValue();
+      }
+    }
+  }
+
   invoc.setArgv0(argv0);
 
   return success;
diff --git a/flang/lib/Frontend/FrontendActions.cpp b/flang/lib/Frontend/FrontendActions.cpp
index 5c86bd947ce73f..4b67c17f761686 100644
--- a/flang/lib/Frontend/FrontendActions.cpp
+++ b/flang/lib/Frontend/FrontendActions.cpp
@@ -81,6 +81,16 @@
 
 using namespace Fortran::frontend;
 
+static constexpr llvm::StringLiteral timingIdParse = "Parse";
+static constexpr llvm::StringLiteral timingIdMLIRGen = "MLIR generation";
+static constexpr llvm::StringLiteral timingIdMLIRPasses =
+    "MLIR translation/optimization";
+static constexpr llvm::StringLiteral timingIdLLVMIRGen = "LLVM IR generation";
+static constexpr llvm::StringLiteral timingIdLLVMIRPasses =
+    "LLVM IR optimizations";
+static constexpr llvm::StringLiteral timingIdBackend =
+    "Assembly/Object code generation";
+
 // Declare plugin extension function declarations.
 #define HANDLE_EXTENSION(Ext)                                                  \
   llvm::PassPluginLibraryInfo get##Ext##PluginInfo();
@@ -224,6 +234,14 @@ static void addAMDGPUSpecificMLIRItems(mlir::ModuleOp &mlirModule,
 bool CodeGenAction::beginSourceFileAction() {
   llvmCtx = std::make_unique<llvm::LLVMContext>();
   CompilerInstance &ci = this->getInstance();
+  mlir::DefaultTimingManager &timingMgr = ci.getTimingManager();
+  mlir::TimingScope &timingScopeRoot = ci.getTimingScopeRoot();
+
+  // This will provide timing information even when the input is an LLVM IR or
+  // MLIR file. That is fine because those do have to be parsed, so the label
+  // is still accurate.
+  mlir::TimingScope timingScopeParse = timingScopeRoot.nest(
+      mlir::TimingIdentifier::get(timingIdParse, timingMgr));
 
   // If the input is an LLVM file, just parse it and return.
   if (this->getCurrentInput().getKind().getLanguage() == Language::LLVM_IR) {
@@ -285,6 +303,10 @@ bool CodeGenAction::beginSourceFileAction() {
   if (!res)
     return res;
 
+  timingScopeParse.stop();
+  mlir::TimingScope timingScopeMLIRGen = timingScopeRoot.nest(
+      mlir::TimingIdentifier::get(timingIdMLIRGen, timingMgr));
+
   // Create a LoweringBridge
   const common::IntrinsicTypeDefaultKinds &defKinds =
       ci.getSemanticsContext().defaultKinds();
@@ -319,6 +341,7 @@ bool CodeGenAction::beginSourceFileAction() {
   // constants etc.
   addDependentLibs(*mlirModule, ci);
   addAMDGPUSpecificMLIRItems(*mlirModule, ci);
+  timingScopeMLIRGen.stop();
 
   // run the default passes.
   mlir::PassManager pm((*mlirModule)->getName(),
@@ -340,6 +363,7 @@ bool CodeGenAction::beginSourceFileAction() {
 
   pm.enableVerifier(/*verifyPasses=*/true);
   pm.addPass(std::make_unique<Fortran::lower::VerifierPass>());
+  pm.enableTiming(timingScopeMLIRGen);
 
   if (mlir::failed(pm.run(*mlirModule))) {
     unsigned diagID = ci.getDiagnostics().getCustomDiagID(
@@ -348,6 +372,7 @@ bool CodeGenAction::beginSourceFileAction() {
     ci.getDiagnostics().Report(diagID);
     return false;
   }
+  timingScopeMLIRGen.stop();
 
   // Print initial full MLIR module, before lowering or transformations, if
   // -save-temps has been specified.
@@ -698,8 +723,10 @@ void CodeGenAction::lowerHLFIRToFIR() {
   assert(mlirModule && "The MLIR module has not been generated yet.");
 
   CompilerInstance &ci = this->getInstance();
-  auto opts = ci.getInvocation().getCodeGenOpts();
+  const CodeGenOptions &opts = ci.getInvocation().getCodeGenOpts();
   llvm::OptimizationLevel level = mapToLevel(opts);
+  mlir::DefaultTimingManager &timingMgr = ci.getTimingManager();
+  mlir::TimingScope &timingScopeRoot = ci.getTimingScopeRoot();
 
   fir::support::loadDialects(*mlirCtx);
 
@@ -714,6 +741,9 @@ void CodeGenAction::lowerHLFIRToFIR() {
   fir::createHLFIRToFIRPassPipeline(pm, level);
   (void)mlir::applyPassManagerCLOptions(pm);
 
+  mlir::TimingScope timingScopeMLIRPasses = timingScopeRoot.nest(
+      mlir::TimingIdentifier::get(timingIdMLIRPasses, timingMgr));
+  pm.enableTiming(timingScopeMLIRPasses);
   if (!mlir::succeeded(pm.run(*mlirModule))) {
     unsigned diagID = ci.getDiagnostics().getCustomDiagID(
         clang::DiagnosticsEngine::Error, "Lowering to FIR failed");
@@ -798,9 +828,12 @@ void CodeGenAction::generateLLVMIR() {
   assert(mlirModule && "The MLIR module has not been generated yet.");
 
   CompilerInstance &ci = this->getInstance();
-  auto opts = ci.getInvocation().getCodeGenOpts();
-  auto mathOpts = ci.getInvocation().getLoweringOpts().getMathOptions();
+  CompilerInvocation &invoc = ci.getInvocation();
+  const CodeGenOptions &opts = invoc.getCodeGenOpts();
+  const auto &mathOpts = invoc.getLoweringOpts().getMathOptions();
   llvm::OptimizationLevel level = mapToLevel(opts);
+  mlir::DefaultTimingManager &timingMgr = ci.getTimingManager();
+  mlir::TimingScope &timingScopeRoot = ci.getTimingScopeRoot();
 
   fir::support::loadDialects(*mlirCtx);
   mlir::DialectRegistry registry;
@@ -832,11 +865,15 @@ void CodeGenAction::generateLLVMIR() {
   (void)mlir::applyPassManagerCLOptions(pm);
 
   // run the pass manager
+  mlir::TimingScope timingScopeMLIRPasses = timingScopeRoot.nest(
+      mlir::TimingIdentifier::get(timingIdMLIRPasses, timingMgr));
+  pm.enableTiming(timingScopeMLIRPasses);
   if (!mlir::succeeded(pm.run(*mlirModule))) {
     unsigned diagID = ci.getDiagnostics().getCustomDiagID(
         clang::DiagnosticsEngine::Error, "Lowering to LLVM IR failed");
     ci.getDiagnostics().Report(diagID);
   }
+  timingScopeMLIRPasses.stop();
 
   // Print final MLIR module, just before translation into LLVM IR, if
   // -save-temps has been specified.
@@ -849,6 +886,8 @@ void CodeGenAction::generateLLVMIR() {
   }
 
   // Translate to LLVM IR
+  mlir::TimingScope timingScopeLLVMIRGen = timingScopeRoot.nest(
+      mlir::TimingIdentifier::get(timingIdLLVMIRGen, timingMgr));
   std::optional<llvm::StringRef> moduleName = mlirModule->getName();
   llvmModule = mlir::translateModuleToLLVMIR(
       *mlirModule, *llvmCtx, moduleName ? *moduleName : "FIRModule");
@@ -952,11 +991,12 @@ static void generateMachineCodeOrAssemblyImpl(clang::DiagnosticsEngine &diags,
 }
 
 void CodeGenAction::runOptimizationPipeline(llvm::raw_pwrite_stream &os) {
-  auto opts = getInstance().getInvocation().getCodeGenOpts();
-  auto &diags = getInstance().getDiagnostics();
+  CompilerInstance &ci = getInstance();
+  const CodeGenOptions &opts = ci.getInvocation().getCodeGenOpts();
+  clang::DiagnosticsEngine &diags = ci.getDiagnostics();
   llvm::OptimizationLevel level = mapToLevel(opts);
 
-  llvm::TargetMachine *targetMachine = &getInstance().getTargetMachine();
+  llvm::TargetMachine *targetMachine = &ci.getTargetMachine();
   // Create the analysis managers.
   llvm::LoopAnalysisManager lam;
   llvm::FunctionAnalysisManager fam;
@@ -970,6 +1010,8 @@ void CodeGenAction::runOptimizationPipeline(llvm::raw_pwrite_stream &os) {
   llvm::StandardInstrumentations si(llvmModule->getContext(),
                                     opts.DebugPassManager);
   si.registerCallbacks(pic, &mam);
+  if (ci.isTimingEnabled())
+    si.getTimePasses().setOutStream(ci.getTimingStreamLLVM());
   llvm::PassBuilder pb(targetMachine, pto, pgoOpt, &pic);
 
   // Attempt to load pass plugins and register their callbacks with PB.
@@ -1017,6 +1059,10 @@ void CodeGenAction::runOptimizationPipeline(llvm::raw_pwrite_stream &os) {
 
   // Run the passes.
   mpm.run(*llvmModule, mam);
+
+  // Print the timers to the associated output stream and reset them.
+  if (ci.isTimingEnabled())
+    si.getTimePasses().print();
 }
 
 // This class handles optimization remark messages requested if
@@ -1238,6 +1284,8 @@ void CodeGenAction::executeAction() {
   const CodeGenOptions &codeGenOpts = ci.getInvocation().getCodeGenOpts();
   Fortran::lower::LoweringOptions &loweringOpts =
       ci.getInvocation().getLoweringOpts();
+  mlir::DefaultTimingManager &timingMgr = ci.getTimingManager();
+  mlir::TimingScope &timingScopeRoot = ci.getTimingScopeRoot();
 
   // If the output stream is a file, generate it and define the corresponding
   // output stream. If a pre-defined output stream is available, we will use
@@ -1283,6 +1331,11 @@ void CodeGenAction::executeAction() {
   if (!llvmModule)
     generateLLVMIR();
 
+  // This will already have been started in generateLLVMIR(). But we need to
+  // continue operating on the module, so we continue timing it.
+  mlir::TimingScope timingScopeLLVMIRGen = timingScopeRoot.nest(
+      mlir::TimingIdentifier::get(timingIdLLVMIRGen, timingMgr));
+
   // If generating the LLVM module failed, abort! No need for further error
   // reporting since generateLLVMIR() does this already.
   if (!llvmModule)
@@ -1312,6 +1365,7 @@ void CodeGenAction::executeAction() {
   // Embed offload objects specified with -fembed-offload-object
   if (!codeGenOpts.OffloadObjects.empty())
     embedOffloadObjects();
+  timingScopeLLVMIRGen.stop();
 
   BackendRemarkConsumer remarkConsumer(diags, codeGenOpts);
 
@@ -1340,7 +1394,10 @@ void CodeGenAction::executeAction() {
   }
 
   // Run LLVM's middle-end (i.e. the optimizer).
+  mlir::TimingScope timingScopeLLVMIRPasses = timingScopeRoot.nest(
+      mlir::TimingIdentifier::get(timingIdLLVMIRPasses, timingMgr));
   runOptimizationPipeline(ci.isOutputStreamNull() ? *os : ci.getOutputStream());
+  timingScopeLLVMIRPasses.stop();
 
   if (action == BackendActionTy::Backend_EmitLL ||
       action == BackendActionTy::Backend_EmitBC) {
@@ -1349,11 +1406,15 @@ void CodeGenAction::executeAction() {
   }
 
   // Run LLVM's backend and generate either assembly or machine code
+  mlir::TimingScope timingScopeBackend = timingScopeRoot.nest(
+      mlir::TimingIdentifier::get(timingIdBackend, timingMgr));
   if (action == BackendActionTy::Backend_EmitAssembly ||
       action == BackendActionTy::Backend_EmitObj) {
     generateMachineCodeOrAssemblyImpl(
         diags, targetMachine, action, *llvmModule, codeGenOpts,
         ci.isOutputStreamNull() ? *os : ci.getOutputStream());
+    if (timingMgr.isEnabled())
+      llvm::reportAndResetTimings(&ci.getTimingStreamCodeGen());
     return;
   }
 }
diff --git a/flang/lib/Support/CMakeLists.txt b/flang/lib/Support/CMakeLists.txt
new file mode 100644
index 00000000000000..9c7887aecafbd6
--- /dev/null
+++ b/flang/lib/Support/CMakeLists.txt
@@ -0,0 +1,9 @@
+add_flang_library(FortranSupport
+  Timing.cpp
+
+  LINK_LIBS
+  MLIRSupport
+
+  LINK_COMPONENTS
+  Support
+)
diff --git a/flang/lib/Support/Timing.cpp b/flang/lib/Support/Timing.cpp
new file mode 100644
index 00000000000000..ee8309a950eec8
--- /dev/null
+++ b/flang/lib/Support/Timing.cpp
@@ -0,0 +1,67 @@
+//===- Timing.cpp - Execution time measurement facilities -----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Facilities to measure and provide statistics on execution time.
+//
+//===----------------------------------------------------------------------===//
+
+#include "flang/Support/Timing.h"
+#include "llvm/Support/Format.h"
+
+class OutputStrategyText : public mlir::OutputStrategy {
+protected:
+  static constexpr llvm::StringLiteral header = "Flang execution timing report";
+
+public:
+  OutputStrategyText(llvm::raw_ostream &os) : mlir::OutputStrategy(os) {}
+
+  void printHeader(const mlir::TimeRecord &total) override {
+    // Figure out how many spaces to description name.
+    unsigned padding = (80 - header.size()) / 2;
+    os << "===" << std::string(73, '-') << "===\n";
+    os.indent(padding) << header << '\n';
+    os << "===" << std::string(73, '-') << "===\n";
+
+    // Print the total time followed by the section headers.
+    os << llvm::format("  Total Execution Time: %.4f seconds\n\n", total.wall);
+    os << "  ----User Time----  ----Wall Time----  ----Name----\n";
+  }
+
+  void printFooter() override { os.flush(); }
+
+  void printTime(
+      const mlir::TimeRecord &time, const mlir::TimeRecord &total) override {
+    os << llvm::format(
+        "  %8.4f (%5.1f%%)", time.user, 100.0 * time.user / total.user);
+    os << llvm::format(
+        "  %8.4f (%5.1f%%)  ", time.wall, 100.0 * time.wall / total.wall);
+  }
+
+  void printListEntry(llvm::StringRef name, const mlir::TimeRecord &time,
+      const mlir::TimeRecord &total, bool lastEntry) override {
+    printTime(time, total);
+    os << name << "\n";
+  }
+
+  void printTreeEntry(unsigned indent, llvm::StringRef name,
+      const mlir::TimeRecord &time, const mlir::TimeRecord &total) override {
+    printTime(time, total);
+    os.indent(indent) << name << "\n";
+  }
+
+  void printTreeEntryEnd(unsigned indent, bool lastEntry) override {}
+};
+
+namespace Fortran::support {
+
+std::unique_ptr<mlir::OutputStrategy> createTimingFormatterText(
+    llvm::raw_ostream &os) {
+  return std::make_unique<OutputStrategyText>(os);
+}
+
+} // namespace Fortran::support
diff --git a/flang/test/Driver/time-report-eq.f90 b/flang/test/Driver/time-report-eq.f90
new file mode 100644
index 00000000000000..176afcaa2b9424
--- /dev/null
+++ b/flang/test/Driver/time-report-eq.f90
@@ -0,0 +1,18 @@
+! Check that -ftime-report flag is passed as-is to fc1. The value of the flag
+! is only checked there. This behavior intentionally mirrors that of clang.
+!
+! -ftime-report= is currently not supported because we do not support detailed
+! timing information on the LLVM IR optimization and code generation passes.
+! When that is supported, these can be re-enabled.
+!
+! XFAIL: *
+!
+! RUN: %flang -### -c -ftime-report=per-pass %s 2>&1 | FileCheck %s -check-prefix=PER-PASS
+! RUN: %flang -### -c -ftime-report=per-pass-run %s 2>&1 | FileCheck %s -check-prefix=PER-PASS-INVOKE
+! RUN: %flang -### -c -ftime-report=unknown %s 2>&1 | FileCheck %s -check-prefix=UNKNOWN
+
+! PER-PASS: "-ftime-report=per-pass"
+! PER-PASS-INVOKE: "-ftime-report=per-pass-run"
+! UNKNOWN: "-ftime-report=unknown"
+
+end program
diff --git a/flang/test/Driver/time-report.f90 b/flang/test/Driver/time-report.f90
new file mode 100644
index 00000000000000..3f6e1e9f87d9a1
--- /dev/null
+++ b/flang/test/Driver/time-report.f90
@@ -0,0 +1,22 @@
+! Check that -ftime-report flag is passed as-is to fc1. The value of the flag
+! is only checked there. This behavior intentionally mirrors that of clang.
+!
+! RUN: %flang -### -c -ftime-report %s 2>&1 | FileCheck %s --check-prefix=CHECK-DRIVER
+
+! TODO: Currently, detailed timing of LLVM IR optimization and code generation
+! passes is not supported. When that is done, add more checks here to make sure
+! the output is as expected.
+
+! RUN: %flang -c -ftime-report -O0 %s 2>&1 | FileCheck %s --check-prefix=CHECK-COMMON
+! RUN: %flang -c -ftime-report -O1 %s 2>&1 | FileCheck %s --check-prefix=CHECK-COMMON
+
+! CHECK-DRIVER: "-ftime-report"
+
+! CHECK-COMMON: Flang execution timing report
+! CHECK-COMMON: MLIR generation
+! CHECK-COMMON: MLIR translation/optimization
+! CHECK-COMMON: LLVM IR generation
+! CHECK-COMMON: LLVM IR optimizations
+! CHECK-COMMON: Assembly/Object code generation
+
+end program

>From fdf393449da5e3c2c9d702ba9c52ba1da67ed69e Mon Sep 17 00:00:00 2001
From: Tarun Prabhu <tarun.prabhu at gmail.com>
Date: Wed, 4 Sep 2024 11:07:40 -0600
Subject: [PATCH 2/3] Run clang-format

---
 clang/lib/Driver/ToolChains/Flang.cpp | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp
index c1e724d7761dc0..2a0827d03d0479 100644
--- a/clang/lib/Driver/ToolChains/Flang.cpp
+++ b/clang/lib/Driver/ToolChains/Flang.cpp
@@ -146,13 +146,13 @@ void Flang::addCodegenOptions(const ArgList &Args,
   if (shouldLoopVersion(Args))
     CmdArgs.push_back("-fversion-loops-for-stride");
 
-  Args.addAllArgs(CmdArgs, {options::OPT_flang_experimental_hlfir,
-                            options::OPT_flang_deprecated_no_hlfir,
-                            options::OPT_flang_experimental_integer_overflow,
-                            options::OPT_fno_ppc_native_vec_elem_order,
-                            options::OPT_fppc_native_vec_elem_order,
-                            options::OPT_ftime_report,
-                            options::OPT_ftime_report_EQ});
+  Args.addAllArgs(CmdArgs,
+                  {options::OPT_flang_experimental_hlfir,
+                   options::OPT_flang_deprecated_no_hlfir,
+                   options::OPT_flang_experimental_integer_overflow,
+                   options::OPT_fno_ppc_native_vec_elem_order,
+                   options::OPT_fppc_native_vec_elem_order,
+                   options::OPT_ftime_report, options::OPT_ftime_report_EQ});
 }
 
 void Flang::addPicOptions(const ArgList &Args, ArgStringList &CmdArgs) const {

>From 3a1046be46bfd2f4297c12016ea3f0216b842750 Mon Sep 17 00:00:00 2001
From: Tarun Prabhu <tarun.prabhu at gmail.com>
Date: Thu, 5 Sep 2024 09:03:48 -0600
Subject: [PATCH 3/3] Address reviewer comments. Remove code specific to
 -ftime-report= since that is not supported right now anyway.

Add checks that will only print out timing streams if there is anything in them
to avoid empty lines.
---
 .../include/flang/Frontend/CompilerInstance.h |  1 -
 .../flang/Frontend/CompilerInvocation.h       |  7 -------
 flang/lib/Frontend/CompilerInstance.cpp       | 19 +++++++++++++------
 flang/lib/Frontend/CompilerInvocation.cpp     | 14 +-------------
 flang/test/Driver/time-report-eq.f90          | 18 ------------------
 5 files changed, 14 insertions(+), 45 deletions(-)
 delete mode 100644 flang/test/Driver/time-report-eq.f90

diff --git a/flang/include/flang/Frontend/CompilerInstance.h b/flang/include/flang/Frontend/CompilerInstance.h
index 2844900063a9e5..509c9f4b9e91aa 100644
--- a/flang/include/flang/Frontend/CompilerInstance.h
+++ b/flang/include/flang/Frontend/CompilerInstance.h
@@ -279,7 +279,6 @@ class CompilerInstance {
   /// {
   /// @name Timing
   /// @{
-  bool isTimingEnabled() { return timingMgr.isEnabled(); }
   bool isTimingEnabled() const { return timingMgr.isEnabled(); }
 
   mlir::DefaultTimingManager &getTimingManager() { return timingMgr; }
diff --git a/flang/include/flang/Frontend/CompilerInvocation.h b/flang/include/flang/Frontend/CompilerInvocation.h
index 4f848989aa1c80..cd632d251ba251 100644
--- a/flang/include/flang/Frontend/CompilerInvocation.h
+++ b/flang/include/flang/Frontend/CompilerInvocation.h
@@ -148,10 +148,6 @@ class CompilerInvocation : public CompilerInvocationBase {
   /// is enabled.
   bool enableTimers;
 
-  /// Whether to report the timing of each run of an LLVM pass. Set when
-  /// -ftime-report=per-pass-run is enabled.
-  bool timeLLVMPassesPerRun;
-
 public:
   CompilerInvocation() = default;
 
@@ -234,9 +230,6 @@ class CompilerInvocation : public CompilerInvocationBase {
   bool getEnableTimers() { return enableTimers; }
   bool getEnableTimers() const { return enableTimers; }
 
-  bool getTimeLLVMPassesPerRun() { return timeLLVMPassesPerRun; }
-  bool getTimeLLVMPassesPerRun() const { return timeLLVMPassesPerRun; }
-
   /// Create a compiler invocation from a list of input options.
   /// \returns true on success.
   /// \returns false if an error was encountered while parsing the arguments
diff --git a/flang/lib/Frontend/CompilerInstance.cpp b/flang/lib/Frontend/CompilerInstance.cpp
index 7cc9cf63aeee1f..96348c716269d6 100644
--- a/flang/lib/Frontend/CompilerInstance.cpp
+++ b/flang/lib/Frontend/CompilerInstance.cpp
@@ -178,7 +178,6 @@ bool CompilerInstance::executeAction(FrontendAction &act) {
     // Once the root cause of this is determined, we should enable this to have
     // behavior that is comparable to clang.
     // llvm::TimePassesIsEnabled = true;
-    // llvm::TimePassesPerRun = invoc.getTimeLLVMPassesPerRun();
 
     timingStreamMLIR = std::make_unique<Fortran::support::string_ostream>();
     timingStreamLLVM = std::make_unique<Fortran::support::string_ostream>();
@@ -218,11 +217,19 @@ bool CompilerInstance::executeAction(FrontendAction &act) {
     timingMgr.setOutput(
         Fortran::support::createTimingFormatterText(mlir::thread_safe_nulls()));
 
-    // This is deliberately done in "reverse" order and does not match the
-    // behavior of clang.
-    llvm::errs() << timingStreamCodeGen->str() << "\n";
-    llvm::errs() << timingStreamLLVM->str() << "\n";
-    llvm::errs() << timingStreamMLIR->str() << "\n";
+    // This prints the timings in "reverse" order, starting from code
+    // generation, followed by LLVM-IR optimizations, then MLIR optimizations
+    // and transformations and the frontend. If any of the steps are disabled,
+    // for instance because code generation was not performed, the strings
+    // will be empty.
+    if (!timingStreamCodeGen->str().empty())
+      llvm::errs() << timingStreamCodeGen->str() << "\n";
+
+    if (!timingStreamLLVM->str().empty())
+      llvm::errs() << timingStreamLLVM->str() << "\n";
+
+    if (!timingStreamMLIR->str().empty())
+      llvm::errs() << timingStreamMLIR->str() << "\n";
   }
 
   return !getDiagnostics().getClient()->getNumErrors();
diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp
index 8607f6316d3167..f92a30157cb6cd 100644
--- a/flang/lib/Frontend/CompilerInvocation.cpp
+++ b/flang/lib/Frontend/CompilerInvocation.cpp
@@ -1360,20 +1360,8 @@ bool CompilerInvocation::createFromArgs(
 
   // Process the timing-related options.
   if (const llvm::opt::Arg *a =
-          args.getLastArg(clang::driver::options::OPT_ftime_report,
-                          clang::driver::options::OPT_ftime_report_EQ)) {
+          args.getLastArg(clang::driver::options::OPT_ftime_report)) {
     invoc.enableTimers = true;
-    if (a->getOption().getID() == clang::driver::options::OPT_ftime_report_EQ) {
-      llvm::StringRef val = a->getValue();
-      if (val == "per-pass") {
-        invoc.timeLLVMPassesPerRun = false;
-      } else if (val == "per-pass-run") {
-        invoc.timeLLVMPassesPerRun = true;
-      } else {
-        diags.Report(clang::diag::err_drv_invalid_value)
-            << a->getAsString(args) << a->getValue();
-      }
-    }
   }
 
   invoc.setArgv0(argv0);
diff --git a/flang/test/Driver/time-report-eq.f90 b/flang/test/Driver/time-report-eq.f90
deleted file mode 100644
index 176afcaa2b9424..00000000000000
--- a/flang/test/Driver/time-report-eq.f90
+++ /dev/null
@@ -1,18 +0,0 @@
-! Check that -ftime-report flag is passed as-is to fc1. The value of the flag
-! is only checked there. This behavior intentionally mirrors that of clang.
-!
-! -ftime-report= is currently not supported because we do not support detailed
-! timing information on the LLVM IR optimization and code generation passes.
-! When that is supported, these can be re-enabled.
-!
-! XFAIL: *
-!
-! RUN: %flang -### -c -ftime-report=per-pass %s 2>&1 | FileCheck %s -check-prefix=PER-PASS
-! RUN: %flang -### -c -ftime-report=per-pass-run %s 2>&1 | FileCheck %s -check-prefix=PER-PASS-INVOKE
-! RUN: %flang -### -c -ftime-report=unknown %s 2>&1 | FileCheck %s -check-prefix=UNKNOWN
-
-! PER-PASS: "-ftime-report=per-pass"
-! PER-PASS-INVOKE: "-ftime-report=per-pass-run"
-! UNKNOWN: "-ftime-report=unknown"
-
-end program



More information about the flang-commits mailing list