[clang] 76b2915 - [Clang][CMake] Use perf-training for Clang-BOLT

Amir Aupov via cfe-commits cfe-commits at lists.llvm.org
Sat May 13 10:36:35 PDT 2023


Author: Amir Ayupov
Date: 2023-05-13T10:36:29-07:00
New Revision: 76b2915fdbbba18693c9aabda419768f41106f31

URL: https://github.com/llvm/llvm-project/commit/76b2915fdbbba18693c9aabda419768f41106f31
DIFF: https://github.com/llvm/llvm-project/commit/76b2915fdbbba18693c9aabda419768f41106f31.diff

LOG: [Clang][CMake] Use perf-training for Clang-BOLT

Leverage perf-training flow for BOLT profile collection, enabling reproducible
BOLT optimization. Remove the use of bootstrapped build for profile collection.

Test Plan:
- Regular (single-stage) build
```
$ cmake ... -C .../clang/cmake/caches/BOLT.cmake
$ ninja clang-bolt
...
[21/24] Instrumenting clang binary with BOLT
[21/24] Generating BOLT profile for Clang
[23/24] Merging BOLT fdata
Profile from 2 files merged.
[24/24] Optimizing Clang with BOLT
...
          1291202496 : executed instructions (-1.1%)
            27005133 : taken branches (-71.5%)
...
```
- Two stage build (ThinLTO+InstPGO)
```
$ cmake ... -C .../clang/cmake/caches/BOLT.cmake -C .../clang/cmake/caches/BOLT-PGO.cmake
$ ninja clang-bolt
$ ninja stage2-clang-bolt
...
[2756/2759] Instrumenting clang binary with BOLT
[2756/2759] Generating BOLT profile for Clang
[2758/2759] Merging BOLT fdata
[2759/2759] Optimizing Clang with BOLT
...
BOLT-INFO: 7092 out of 184104 functions in the binary (3.9%) have non-empty execution profile
           756531927 : executed instructions (-0.5%)
            15399400 : taken branches (-40.3%)
...
```

Reviewed By: beanz

Differential Revision: https://reviews.llvm.org/D143553

Added: 
    clang/utils/perf-training/bolt.lit.cfg
    clang/utils/perf-training/bolt.lit.site.cfg.in

Modified: 
    clang/CMakeLists.txt
    clang/cmake/caches/BOLT.cmake
    clang/utils/perf-training/CMakeLists.txt

Removed: 
    


################################################################################
diff  --git a/clang/CMakeLists.txt b/clang/CMakeLists.txt
index ef69a68e460a0..bd2ac69c1455e 100644
--- a/clang/CMakeLists.txt
+++ b/clang/CMakeLists.txt
@@ -851,9 +851,8 @@ endif()
 
 if (CLANG_BOLT_INSTRUMENT AND NOT LLVM_BUILD_INSTRUMENTED)
   set(CLANG_PATH ${LLVM_RUNTIME_OUTPUT_INTDIR}/clang)
-  set(CLANGXX_PATH ${CLANG_PATH}++)
   set(CLANG_INSTRUMENTED ${CLANG_PATH}-bolt.inst)
-  set(CLANGXX_INSTRUMENTED ${CLANGXX_PATH}-bolt.inst)
+  set(BOLT_FDATA ${CMAKE_CURRENT_BINARY_DIR}/utils/perf-training/prof.fdata)
 
   # Instrument clang with BOLT
   add_custom_target(clang-instrumented
@@ -863,73 +862,11 @@ if (CLANG_BOLT_INSTRUMENT AND NOT LLVM_BUILD_INSTRUMENTED)
     DEPENDS clang llvm-bolt
     COMMAND llvm-bolt ${CLANG_PATH} -o ${CLANG_INSTRUMENTED}
       -instrument --instrumentation-file-append-pid
-      --instrumentation-file=${CMAKE_CURRENT_BINARY_DIR}/prof.fdata
+      --instrumentation-file=${BOLT_FDATA}
     COMMENT "Instrumenting clang binary with BOLT"
     VERBATIM
   )
 
-  # Make a symlink from clang-bolt.inst to clang++-bolt.inst
-  add_custom_target(clang++-instrumented
-    DEPENDS ${CLANGXX_INSTRUMENTED}
-  )
-  add_custom_command(OUTPUT ${CLANGXX_INSTRUMENTED}
-    DEPENDS clang-instrumented
-    COMMAND ${CMAKE_COMMAND} -E create_symlink
-      ${CLANG_INSTRUMENTED}
-      ${CLANGXX_INSTRUMENTED}
-    COMMENT "Creating symlink from BOLT instrumented clang to clang++"
-    VERBATIM
-  )
-
-  # Build specified targets with instrumented Clang to collect the profile
-  set(STAMP_DIR ${CMAKE_CURRENT_BINARY_DIR}/bolt-instrumented-clang-stamps/)
-  set(BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/bolt-instrumented-clang-bins/)
-  set(build_configuration "$<CONFIG>")
-  include(ExternalProject)
-  ExternalProject_Add(bolt-instrumentation-profile
-    DEPENDS clang++-instrumented
-    PREFIX bolt-instrumentation-profile
-    SOURCE_DIR ${CMAKE_SOURCE_DIR}
-    STAMP_DIR ${STAMP_DIR}
-    BINARY_DIR ${BINARY_DIR}
-    EXCLUDE_FROM_ALL 1
-    CMAKE_ARGS
-                ${CLANG_BOLT_INSTRUMENT_EXTRA_CMAKE_FLAGS}
-                # We shouldn't need to set this here, but INSTALL_DIR doesn't
-                # seem to work, so instead I'm passing this through
-                -DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}
-                -DCMAKE_C_COMPILER=${CLANG_INSTRUMENTED}
-                -DCMAKE_CXX_COMPILER=${CLANGXX_INSTRUMENTED}
-                -DCMAKE_ASM_COMPILER=${CLANG_INSTRUMENTED}
-                -DCMAKE_ASM_COMPILER_ID=Clang
-                -DCMAKE_BUILD_TYPE=Release
-                -DLLVM_ENABLE_PROJECTS=${CLANG_BOLT_INSTRUMENT_PROJECTS}
-                -DLLVM_TARGETS_TO_BUILD=${LLVM_TARGETS_TO_BUILD}
-    BUILD_COMMAND ${CMAKE_COMMAND} --build ${BINARY_DIR}
-                                   --config ${build_configuration}
-                                   --target ${CLANG_BOLT_INSTRUMENT_TARGETS}
-    INSTALL_COMMAND ""
-    STEP_TARGETS configure build
-    USES_TERMINAL_CONFIGURE 1
-    USES_TERMINAL_BUILD 1
-    USES_TERMINAL_INSTALL 1
-  )
-
-  # Merge profiles into one using merge-fdata
-  add_custom_target(clang-bolt-profile
-    DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/prof.fdata
-  )
-  add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/prof.fdata
-    DEPENDS merge-fdata bolt-instrumentation-profile-build
-    WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
-    COMMAND ${Python3_EXECUTABLE}
-      ${CMAKE_CURRENT_SOURCE_DIR}/utils/perf-training/perf-helper.py merge-fdata
-      $<TARGET_FILE:merge-fdata> ${CMAKE_CURRENT_BINARY_DIR}/prof.fdata
-      ${CMAKE_CURRENT_BINARY_DIR}
-    COMMENT "Preparing BOLT profile"
-    VERBATIM
-  )
-
   # Optimize original (pre-bolt) Clang using the collected profile
   set(CLANG_OPTIMIZED ${CMAKE_CURRENT_BINARY_DIR}/clang.bolt)
   add_custom_target(clang-bolt
@@ -939,7 +876,7 @@ if (CLANG_BOLT_INSTRUMENT AND NOT LLVM_BUILD_INSTRUMENTED)
     DEPENDS clang-bolt-profile
     COMMAND llvm-bolt ${CLANG_PATH}
       -o ${CLANG_OPTIMIZED}
-      -data ${CMAKE_CURRENT_BINARY_DIR}/prof.fdata
+      -data ${BOLT_FDATA}
       -reorder-blocks=ext-tsp -reorder-functions=hfsort+ -split-functions
       -split-all-cold -split-eh -dyno-stats -icf=1 -use-gnu-stack
     COMMAND ${CMAKE_COMMAND} -E rename ${CLANG_OPTIMIZED} $<TARGET_FILE:clang>

diff  --git a/clang/cmake/caches/BOLT.cmake b/clang/cmake/caches/BOLT.cmake
index 78ab3b64386a4..7c75d60320f7e 100644
--- a/clang/cmake/caches/BOLT.cmake
+++ b/clang/cmake/caches/BOLT.cmake
@@ -1,9 +1,6 @@
 set(CMAKE_BUILD_TYPE Release CACHE STRING "")
 set(CLANG_BOLT_INSTRUMENT ON CACHE BOOL "")
-set(CLANG_BOLT_INSTRUMENT_PROJECTS "llvm" CACHE STRING "")
-set(CLANG_BOLT_INSTRUMENT_TARGETS "count" CACHE STRING "")
 set(CMAKE_EXE_LINKER_FLAGS "-Wl,--emit-relocs,-znow" CACHE STRING "")
-set(CLANG_BOLT_INSTRUMENT_EXTRA_CMAKE_FLAGS "" CACHE STRING "")
 
 set(LLVM_ENABLE_PROJECTS "bolt;clang" CACHE STRING "")
 set(LLVM_TARGETS_TO_BUILD Native CACHE STRING "")

diff  --git a/clang/utils/perf-training/CMakeLists.txt b/clang/utils/perf-training/CMakeLists.txt
index 0d551baba2ccf..c6d51863fb1b5 100644
--- a/clang/utils/perf-training/CMakeLists.txt
+++ b/clang/utils/perf-training/CMakeLists.txt
@@ -61,3 +61,26 @@ if(APPLE AND DTRACE AND NOT LLVM_TOOL_LLVM_DRIVER_BUILD)
     COMMENT "Generating order file"
     DEPENDS generate-dtrace-logs)
 endif()
+
+if(CLANG_BOLT_INSTRUMENT AND NOT LLVM_BUILD_INSTRUMENTED)
+  configure_lit_site_cfg(
+    ${CMAKE_CURRENT_SOURCE_DIR}/bolt.lit.site.cfg.in
+    ${CMAKE_CURRENT_BINARY_DIR}/bolt-fdata/lit.site.cfg
+    )
+
+  add_lit_testsuite(generate-bolt-fdata "Generating BOLT profile for Clang"
+    ${CMAKE_CURRENT_BINARY_DIR}/bolt-fdata/
+    EXCLUDE_FROM_CHECK_ALL
+    DEPENDS clang-instrumented clear-bolt-fdata
+    )
+
+  add_custom_target(clear-bolt-fdata
+    COMMAND "${Python3_EXECUTABLE}" ${CMAKE_CURRENT_SOURCE_DIR}/perf-helper.py clean ${CMAKE_CURRENT_BINARY_DIR} fdata
+    COMMENT "Clearing old BOLT fdata")
+
+  # Merge profiles into one using merge-fdata
+  add_custom_target(clang-bolt-profile
+    COMMAND "${Python3_EXECUTABLE}" ${CMAKE_CURRENT_SOURCE_DIR}/perf-helper.py merge-fdata $<TARGET_FILE:merge-fdata> ${CMAKE_CURRENT_BINARY_DIR}/prof.fdata ${CMAKE_CURRENT_BINARY_DIR}
+    COMMENT "Merging BOLT fdata"
+    DEPENDS merge-fdata generate-bolt-fdata)
+endif()

diff  --git a/clang/utils/perf-training/bolt.lit.cfg b/clang/utils/perf-training/bolt.lit.cfg
new file mode 100644
index 0000000000000..234ac855bd67c
--- /dev/null
+++ b/clang/utils/perf-training/bolt.lit.cfg
@@ -0,0 +1,20 @@
+# -*- Python -*-
+
+from lit import Test
+import lit.formats
+import lit.util
+import os
+import subprocess
+
+config.clang = os.path.realpath(lit.util.which('clang-bolt.inst', config.clang_tools_dir)).replace('\\', '/')
+
+config.name = 'Clang Perf Training'
+config.suffixes = ['.c', '.cc', '.cpp', '.m', '.mm', '.cu', '.ll', '.cl', '.s', '.S', '.modulemap', '.test']
+
+use_lit_shell = os.environ.get("LIT_USE_INTERNAL_SHELL")
+config.test_format = lit.formats.ShTest(use_lit_shell == "0")
+config.substitutions.append( ('%clang_cpp_skip_driver', ' %s --driver-mode=g++ ' % (config.clang)))
+config.substitutions.append( ('%clang_cpp', ' %s --driver-mode=g++ ' % (config.clang)))
+config.substitutions.append( ('%clang_skip_driver', ' %s ' % (config.clang)))
+config.substitutions.append( ('%clang', ' %s ' % (config.clang) ) )
+config.substitutions.append( ('%test_root', config.test_exec_root ) )

diff  --git a/clang/utils/perf-training/bolt.lit.site.cfg.in b/clang/utils/perf-training/bolt.lit.site.cfg.in
new file mode 100644
index 0000000000000..3029319673fc2
--- /dev/null
+++ b/clang/utils/perf-training/bolt.lit.site.cfg.in
@@ -0,0 +1,14 @@
+ at LIT_SITE_CFG_IN_HEADER@
+
+import sys
+
+config.clang_tools_dir = lit_config.substitute("@CURRENT_TOOLS_DIR@")
+config.perf_helper_dir = "@CMAKE_CURRENT_SOURCE_DIR@"
+config.test_exec_root = "@CMAKE_CURRENT_BINARY_DIR@"
+config.test_source_root = "@CLANG_PGO_TRAINING_DATA@"
+config.target_triple = "@LLVM_TARGET_TRIPLE@"
+config.python_exe = "@Python3_EXECUTABLE@"
+config.clang_obj_root = path(r"@CLANG_BINARY_DIR@")
+
+# Let the main config do the real work.
+lit_config.load_config(config, "@CLANG_SOURCE_DIR@/utils/perf-training/bolt.lit.cfg")


        


More information about the cfe-commits mailing list