[clang] [llvm] [Clang][CMake] Add CSSPGO support to LLVM_BUILD_INSTRUMENTED (PR #79942)

Amir Ayupov via cfe-commits cfe-commits at lists.llvm.org
Wed Sep 25 16:07:29 PDT 2024


https://github.com/aaupov updated https://github.com/llvm/llvm-project/pull/79942

>From a3a8054675f295c15e7a7e45f7aebd2223072964 Mon Sep 17 00:00:00 2001
From: Amir Ayupov <aaupov at fb.com>
Date: Wed, 19 Jul 2023 20:30:29 -0700
Subject: [PATCH] [Clang][CMake] Add CSSPGO support to LLVM_BUILD_INSTRUMENTED

Build on Clang-BOLT infrastructure to collect sample profiles for CSSPGO.
Add clang/cmake/caches/CSSPGO.cmake to automate CSSPGO Clang build.

Differential Revision: https://reviews.llvm.org/D155419
---
 clang/CMakeLists.txt                       | 15 +++++-
 clang/cmake/caches/BOLT-CSSPGO.cmake       |  3 ++
 clang/cmake/caches/BOLT-PGO.cmake          |  3 +-
 clang/cmake/caches/CSSPGO.cmake            |  2 +
 clang/utils/perf-training/CMakeLists.txt   | 36 +++++++++++--
 clang/utils/perf-training/perf-helper.py   | 60 +++++++++++++++++-----
 llvm/CMakeLists.txt                        |  3 ++
 llvm/cmake/modules/HandleLLVMOptions.cmake | 26 +++++++++-
 8 files changed, 127 insertions(+), 21 deletions(-)
 create mode 100644 clang/cmake/caches/BOLT-CSSPGO.cmake
 create mode 100644 clang/cmake/caches/CSSPGO.cmake

diff --git a/clang/CMakeLists.txt b/clang/CMakeLists.txt
index 94ce5968beb75e..644db0f846000f 100644
--- a/clang/CMakeLists.txt
+++ b/clang/CMakeLists.txt
@@ -744,11 +744,22 @@ if (CLANG_ENABLE_BOOTSTRAP)
   if(BOOTSTRAP_LLVM_BUILD_INSTRUMENTED)
     add_dependencies(clang-bootstrap-deps llvm-profdata)
     set(PGO_OPT -DLLVM_PROFDATA=${LLVM_RUNTIME_OUTPUT_INTDIR}/llvm-profdata)
+    string(TOUPPER "${BOOTSTRAP_LLVM_BUILD_INSTRUMENTED}" BOOTSTRAP_LLVM_BUILD_INSTRUMENTED)
+    if (BOOTSTRAP_LLVM_BUILD_INSTRUMENTED STREQUAL "CSSPGO")
+      add_dependencies(clang-bootstrap-deps llvm-profgen)
+      list(APPEND PGO_OPT -DLLVM_PROFGEN=${LLVM_RUNTIME_OUTPUT_INTDIR}/llvm-profgen)
+    endif()
   endif()
 
   if(LLVM_BUILD_INSTRUMENTED)
-    add_dependencies(clang-bootstrap-deps generate-profdata)
-    set(PGO_OPT -DLLVM_PROFDATA_FILE=${CMAKE_CURRENT_BINARY_DIR}/utils/perf-training/clang.profdata)
+    string(TOUPPER "${LLVM_BUILD_INSTRUMENTED}" LLVM_BUILD_INSTRUMENTED)
+    if (LLVM_BUILD_INSTRUMENTED STREQUAL "CSSPGO")
+      add_dependencies(clang-bootstrap-deps generate-sprofdata)
+      set(PGO_OPT -DLLVM_SPROFDATA_FILE=${CMAKE_CURRENT_BINARY_DIR}/utils/perf-training/clang.sprofdata)
+    else()
+      add_dependencies(clang-bootstrap-deps generate-profdata)
+      set(PGO_OPT -DLLVM_PROFDATA_FILE=${CMAKE_CURRENT_BINARY_DIR}/utils/perf-training/clang.profdata)
+    endif()
     # Use the current tools for LTO instead of the instrumented ones
     list(APPEND _BOOTSTRAP_DEFAULT_PASSTHROUGH
       CMAKE_CXX_COMPILER
diff --git a/clang/cmake/caches/BOLT-CSSPGO.cmake b/clang/cmake/caches/BOLT-CSSPGO.cmake
new file mode 100644
index 00000000000000..b1c204ad57ac58
--- /dev/null
+++ b/clang/cmake/caches/BOLT-CSSPGO.cmake
@@ -0,0 +1,3 @@
+set(BOLT_PGO_CMAKE_CACHE "CSSPGO" CACHE STRING "")
+set(BOOTSTRAP_CMAKE_BUILD_TYPE RelWithDebInfo CACHE STRING "")
+include(${CMAKE_CURRENT_LIST_DIR}/BOLT-PGO.cmake)
diff --git a/clang/cmake/caches/BOLT-PGO.cmake b/clang/cmake/caches/BOLT-PGO.cmake
index 1a04ca9a74e5e3..cc9410fd0e95c7 100644
--- a/clang/cmake/caches/BOLT-PGO.cmake
+++ b/clang/cmake/caches/BOLT-PGO.cmake
@@ -1,3 +1,4 @@
+set(BOLT_PGO_CMAKE_CACHE "PGO" CACHE STRING "")
 set(LLVM_ENABLE_PROJECTS "bolt;clang;lld" CACHE STRING "")
 
 set(CLANG_BOOTSTRAP_TARGETS
@@ -14,4 +15,4 @@ set(BOOTSTRAP_CLANG_BOOTSTRAP_TARGETS
 set(PGO_BUILD_CONFIGURATION
   ${CMAKE_CURRENT_LIST_DIR}/BOLT.cmake
   CACHE STRING "")
-include(${CMAKE_CURRENT_LIST_DIR}/PGO.cmake)
+include(${CMAKE_CURRENT_LIST_DIR}/${BOLT_PGO_CMAKE_CACHE}.cmake)
diff --git a/clang/cmake/caches/CSSPGO.cmake b/clang/cmake/caches/CSSPGO.cmake
new file mode 100644
index 00000000000000..59e08a64f8aad8
--- /dev/null
+++ b/clang/cmake/caches/CSSPGO.cmake
@@ -0,0 +1,2 @@
+set(BOOTSTRAP_LLVM_BUILD_INSTRUMENTED "CSSPGO" CACHE STRING "")
+include(${CMAKE_CURRENT_LIST_DIR}/PGO.cmake)
diff --git a/clang/utils/perf-training/CMakeLists.txt b/clang/utils/perf-training/CMakeLists.txt
index 49673790ff6e84..45a2b475dd1a70 100644
--- a/clang/utils/perf-training/CMakeLists.txt
+++ b/clang/utils/perf-training/CMakeLists.txt
@@ -6,6 +6,10 @@ set(CLANG_PGO_TRAINING_DATA "${CMAKE_CURRENT_SOURCE_DIR}" CACHE PATH
 set(CLANG_PGO_TRAINING_DATA_SOURCE_DIR OFF CACHE STRING "Path to source directory containing cmake project with source files to use for generating pgo data")
 set(CLANG_PGO_TRAINING_DEPS "" CACHE STRING "Extra dependencies needed to build the PGO training data.")
 
+add_custom_target(clear-perf-data
+  COMMAND "${Python3_EXECUTABLE}" ${CMAKE_CURRENT_SOURCE_DIR}/perf-helper.py clean ${CMAKE_CURRENT_BINARY_DIR} perf.data
+  COMMENT "Clearing old perf data")
+
 if(LLVM_BUILD_INSTRUMENTED)
   configure_lit_site_cfg(
     ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.in
@@ -15,7 +19,7 @@ if(LLVM_BUILD_INSTRUMENTED)
   add_lit_testsuite(generate-profraw "Generating clang PGO data"
     ${CMAKE_CURRENT_BINARY_DIR}/pgo-data/
     EXCLUDE_FROM_CHECK_ALL
-    DEPENDS clear-profraw
+    DEPENDS clear-profraw clang
     )
 
   add_custom_target(clear-profraw
@@ -49,6 +53,32 @@ if(LLVM_BUILD_INSTRUMENTED)
               USE_TOOLCHAIN EXLUDE_FROM_ALL NO_INSTALL DEPENDS generate-profraw)
       add_dependencies(generate-profdata generate-profraw-external)
     endif()
+
+    if(NOT LLVM_PROFGEN)
+      find_program(LLVM_PROFGEN llvm-profgen)
+    endif()
+
+    if(NOT LLVM_PROFGEN)
+      message(STATUS "To enable converting CSSPGO samples LLVM_PROFGEN has to point to llvm-profgen")
+    elseif(NOT CLANG_PGO_TRAINING_DATA_SOURCE_DIR)
+      message(STATUS "CLANG_PGO_TRAINING_DATA_SOURCE_DIR must be set to collect CSSPGO samples")
+    else()
+      set(PERF_HELPER "${Python3_EXECUTABLE}" ${CMAKE_CURRENT_SOURCE_DIR}/perf-helper.py)
+      set(CLANG_SPROFDATA ${CMAKE_CURRENT_BINARY_DIR}/clang.sprofdata)
+      add_custom_command(
+        OUTPUT ${CLANG_SPROFDATA}
+        # Execute generate-profraw-external under perf
+        COMMAND ${PERF_HELPER} perf --csspgo -- ${CMAKE_COMMAND} --build ${CMAKE_BINARY_DIR} --target generate-profraw-external
+        # Convert perf profile into profraw
+        COMMAND ${PERF_HELPER} perf2prof ${LLVM_PROFGEN} $<TARGET_FILE:clang> ${CMAKE_CURRENT_BINARY_DIR}
+        # Merge profdata
+        COMMAND ${PERF_HELPER} merge --sample ${LLVM_PROFDATA} ${CLANG_SPROFDATA} ${CMAKE_CURRENT_BINARY_DIR}
+        DEPENDS clang ${CLANG_PGO_TRAINING_DEPS} clear-perf-data generate-profraw-external-clean
+        VERBATIM
+        USES_TERMINAL
+      )
+      add_custom_target(generate-sprofdata DEPENDS ${CLANG_SPROFDATA})
+    endif()
   endif()
 endif()
 
@@ -100,10 +130,6 @@ if(CLANG_BOLT AND NOT LLVM_BUILD_INSTRUMENTED)
     COMMAND "${Python3_EXECUTABLE}" ${CMAKE_CURRENT_SOURCE_DIR}/perf-helper.py clean ${CMAKE_CURRENT_BINARY_DIR} fdata
     COMMENT "Clearing old BOLT fdata")
 
-  add_custom_target(clear-perf-data
-    COMMAND "${Python3_EXECUTABLE}" ${CMAKE_CURRENT_SOURCE_DIR}/perf-helper.py clean ${CMAKE_CURRENT_BINARY_DIR} perf.data
-    COMMENT "Clearing old perf data")
-
   string(TOUPPER "${CLANG_BOLT}" CLANG_BOLT)
   if (CLANG_BOLT STREQUAL "LBR")
     set(BOLT_LBR "--lbr")
diff --git a/clang/utils/perf-training/perf-helper.py b/clang/utils/perf-training/perf-helper.py
index 3ed42a187fd803..a57fa8d21657e9 100644
--- a/clang/utils/perf-training/perf-helper.py
+++ b/clang/utils/perf-training/perf-helper.py
@@ -36,21 +36,29 @@ def clean(args):
             + "\tRemoves all files with extension from <path>."
         )
         return 1
-    for path in args[1:-1]:
+    for path in args[:-1]:
         for filename in findFilesWithExtension(path, args[-1]):
             os.remove(filename)
     return 0
 
 
 def merge(args):
-    if len(args) < 3:
-        print(
-            "Usage: %s merge <llvm-profdata> <output> <paths>\n" % __file__
-            + "\tMerges all profraw files from path into output."
-        )
-        return 1
-    cmd = [args[0], "merge", "-o", args[1]]
-    for path in args[2:]:
+    parser = argparse.ArgumentParser(
+        prog="perf-helper merge",
+        description="Merges all profraw files from path(s) into output",
+    )
+    parser.add_argument("profdata", help="Path to llvm-profdata tool")
+    parser.add_argument("output", help="Output filename")
+    parser.add_argument(
+        "paths", nargs="+", help="Folder(s) containing input profraw files"
+    )
+    parser.add_argument("--sample", action="store_true", help="Sample profile")
+    opts = parser.parse_args(args)
+
+    cmd = [opts.profdata, "merge", "-o", opts.output]
+    if opts.sample:
+        cmd += ["--sample"]
+    for path in opts.paths:
         cmd.extend(findFilesWithExtension(path, "profraw"))
     subprocess.check_call(cmd)
     return 0
@@ -71,25 +79,32 @@ def merge_fdata(args):
 
 def perf(args):
     parser = argparse.ArgumentParser(
-        prog="perf-helper perf", description="perf wrapper for BOLT profile collection"
+        prog="perf-helper perf",
+        description="perf wrapper for BOLT/CSSPGO profile collection",
     )
     parser.add_argument(
         "--lbr", action="store_true", help="Use perf with branch stacks"
     )
+    parser.add_argument(
+        "--csspgo", action="store_true", help="Enable CSSPGO flags"
+    )
     parser.add_argument("cmd", nargs=argparse.REMAINDER, help="")
 
     opts = parser.parse_args(args)
     cmd = opts.cmd[1:]
 
+    event = "br_inst_retired.near_taken:uppp" if opts.csspgo else "cycles:u"
     perf_args = [
         "perf",
         "record",
-        "--event=cycles:u",
+        f"--event={event}",
         "--freq=max",
         "--output=%d.perf.data" % os.getpid(),
     ]
-    if opts.lbr:
+    if opts.lbr or opts.csspgo:
         perf_args += ["--branch-filter=any,u"]
+    if opts.csspgo:
+        perf_args += ["-g", "--call-graph=fp"]
     perf_args.extend(cmd)
 
     start_time = time.time()
@@ -125,6 +140,26 @@ def perf2bolt(args):
     return 0
 
 
+def perf2prof(args):
+    parser = argparse.ArgumentParser(
+        prog="perf-helper perf2prof",
+        description="perf to CSSPGO prof conversion wrapper",
+    )
+    parser.add_argument("profgen", help="Path to llvm-profgen binary")
+    parser.add_argument("binary", help="Input binary")
+    parser.add_argument("paths", nargs="+", help="Path containing perf.data files")
+    opts = parser.parse_args(args)
+
+    profgen_args = [opts.profgen, f"--binary={opts.binary}"]
+    for path in opts.paths:
+        for filename in findFilesWithExtension(path, "perf.data"):
+            subprocess.check_call(
+                profgen_args
+                + [f"--perfdata={filename}", f"--output={filename}.profraw"]
+            )
+    return 0
+
+
 def dtrace(args):
     parser = argparse.ArgumentParser(
         prog="perf-helper dtrace",
@@ -567,6 +602,7 @@ def genOrderFile(args):
     "merge-fdata": merge_fdata,
     "perf": perf,
     "perf2bolt": perf2bolt,
+    "perf2prof": perf2prof,
 }
 
 
diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt
index c637febce1c1fe..a311764d90b17a 100644
--- a/llvm/CMakeLists.txt
+++ b/llvm/CMakeLists.txt
@@ -948,6 +948,9 @@ set(LLVM_ENABLE_PER_TARGET_RUNTIME_DIR ${LLVM_ENABLE_PER_TARGET_RUNTIME_DIR_defa
 set(LLVM_PROFDATA_FILE "" CACHE FILEPATH
   "Profiling data file to use when compiling in order to improve runtime performance.")
 
+set(LLVM_SPROFDATA_FILE "" CACHE FILEPATH
+  "Sampling profiling data file to use when compiling in order to improve runtime performance.")
+
 if(LLVM_INCLUDE_TESTS)
   # All LLVM Python files should be compatible down to this minimum version.
   set(LLVM_MINIMUM_PYTHON_VERSION 3.8)
diff --git a/llvm/cmake/modules/HandleLLVMOptions.cmake b/llvm/cmake/modules/HandleLLVMOptions.cmake
index ed13a82905b4e3..90e06ef9f3887b 100644
--- a/llvm/cmake/modules/HandleLLVMOptions.cmake
+++ b/llvm/cmake/modules/HandleLLVMOptions.cmake
@@ -1134,7 +1134,7 @@ endif()
 option(LLVM_ENABLE_IR_PGO "Build LLVM and tools with IR PGO instrumentation (deprecated)" Off)
 mark_as_advanced(LLVM_ENABLE_IR_PGO)
 
-set(LLVM_BUILD_INSTRUMENTED OFF CACHE STRING "Build LLVM and tools with PGO instrumentation. May be specified as IR or Frontend")
+set(LLVM_BUILD_INSTRUMENTED OFF CACHE STRING "Build LLVM and tools with PGO instrumentation. May be specified as IR, Frontend, CSIR, CSSPGO")
 set(LLVM_VP_COUNTERS_PER_SITE "1.5" CACHE STRING "Value profile counters to use per site for IR PGO with Clang")
 mark_as_advanced(LLVM_BUILD_INSTRUMENTED LLVM_VP_COUNTERS_PER_SITE)
 string(TOUPPER "${LLVM_BUILD_INSTRUMENTED}" uppercase_LLVM_BUILD_INSTRUMENTED)
@@ -1167,6 +1167,15 @@ if (LLVM_BUILD_INSTRUMENTED)
         CMAKE_EXE_LINKER_FLAGS
         CMAKE_SHARED_LINKER_FLAGS)
     endif()
+  elseif(uppercase_LLVM_BUILD_INSTRUMENTED STREQUAL "CSSPGO")
+    append("-fno-omit-frame-pointer -mno-omit-leaf-frame-pointer -fno-optimize-sibling-calls -fpseudo-probe-for-profiling"
+      CMAKE_CXX_FLAGS
+      CMAKE_C_FLAGS)
+    if(NOT LINKER_IS_LLD_LINK)
+      append("-fno-omit-frame-pointer -mno-omit-leaf-frame-pointer -fno-optimize-sibling-calls -fpseudo-probe-for-profiling"
+        CMAKE_EXE_LINKER_FLAGS
+        CMAKE_SHARED_LINKER_FLAGS)
+    endif()
   else()
     append("-fprofile-instr-generate=\"${LLVM_PROFILE_FILE_PATTERN}\""
       CMAKE_CXX_FLAGS
@@ -1217,6 +1226,21 @@ if(LLVM_PROFDATA_FILE AND EXISTS ${LLVM_PROFDATA_FILE})
   endif()
 endif()
 
+if(LLVM_SPROFDATA_FILE AND EXISTS ${LLVM_SPROFDATA_FILE})
+  if ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang" )
+    append("-fpseudo-probe-for-profiling -fprofile-sample-use=\"${LLVM_SPROFDATA_FILE}\""
+      CMAKE_CXX_FLAGS
+      CMAKE_C_FLAGS)
+    if(NOT LINKER_IS_LLD_LINK)
+      append("-fpseudo-probe-for-profiling -fprofile-sample-use=\"${LLVM_SPROFDATA_FILE}\""
+        CMAKE_EXE_LINKER_FLAGS
+        CMAKE_SHARED_LINKER_FLAGS)
+    endif()
+  else()
+    message(FATAL_ERROR "LLVM_SPROFDATA_FILE can only be specified when compiling with clang")
+  endif()
+endif()
+
 option(LLVM_BUILD_INSTRUMENTED_COVERAGE "Build LLVM and tools with Code Coverage instrumentation" Off)
 option(LLVM_INDIVIDUAL_TEST_COVERAGE "Emit individual coverage file for each test case." OFF)
 mark_as_advanced(LLVM_BUILD_INSTRUMENTED_COVERAGE)



More information about the cfe-commits mailing list