[clang] 76b2915 - [Clang][CMake] Use perf-training for Clang-BOLT
Amir Aupov via cfe-commits
cfe-commits at lists.llvm.org
Sat May 13 10:36:35 PDT 2023
Author: Amir Ayupov
Date: 2023-05-13T10:36:29-07:00
New Revision: 76b2915fdbbba18693c9aabda419768f41106f31
URL: https://github.com/llvm/llvm-project/commit/76b2915fdbbba18693c9aabda419768f41106f31
DIFF: https://github.com/llvm/llvm-project/commit/76b2915fdbbba18693c9aabda419768f41106f31.diff
LOG: [Clang][CMake] Use perf-training for Clang-BOLT
Leverage perf-training flow for BOLT profile collection, enabling reproducible
BOLT optimization. Remove the use of bootstrapped build for profile collection.
Test Plan:
- Regular (single-stage) build
```
$ cmake ... -C .../clang/cmake/caches/BOLT.cmake
$ ninja clang-bolt
...
[21/24] Instrumenting clang binary with BOLT
[21/24] Generating BOLT profile for Clang
[23/24] Merging BOLT fdata
Profile from 2 files merged.
[24/24] Optimizing Clang with BOLT
...
1291202496 : executed instructions (-1.1%)
27005133 : taken branches (-71.5%)
...
```
- Two stage build (ThinLTO+InstPGO)
```
$ cmake ... -C .../clang/cmake/caches/BOLT.cmake -C .../clang/cmake/caches/BOLT-PGO.cmake
$ ninja clang-bolt
$ ninja stage2-clang-bolt
...
[2756/2759] Instrumenting clang binary with BOLT
[2756/2759] Generating BOLT profile for Clang
[2758/2759] Merging BOLT fdata
[2759/2759] Optimizing Clang with BOLT
...
BOLT-INFO: 7092 out of 184104 functions in the binary (3.9%) have non-empty execution profile
756531927 : executed instructions (-0.5%)
15399400 : taken branches (-40.3%)
...
```
Reviewed By: beanz
Differential Revision: https://reviews.llvm.org/D143553
Added:
clang/utils/perf-training/bolt.lit.cfg
clang/utils/perf-training/bolt.lit.site.cfg.in
Modified:
clang/CMakeLists.txt
clang/cmake/caches/BOLT.cmake
clang/utils/perf-training/CMakeLists.txt
Removed:
################################################################################
diff --git a/clang/CMakeLists.txt b/clang/CMakeLists.txt
index ef69a68e460a0..bd2ac69c1455e 100644
--- a/clang/CMakeLists.txt
+++ b/clang/CMakeLists.txt
@@ -851,9 +851,8 @@ endif()
if (CLANG_BOLT_INSTRUMENT AND NOT LLVM_BUILD_INSTRUMENTED)
set(CLANG_PATH ${LLVM_RUNTIME_OUTPUT_INTDIR}/clang)
- set(CLANGXX_PATH ${CLANG_PATH}++)
set(CLANG_INSTRUMENTED ${CLANG_PATH}-bolt.inst)
- set(CLANGXX_INSTRUMENTED ${CLANGXX_PATH}-bolt.inst)
+ set(BOLT_FDATA ${CMAKE_CURRENT_BINARY_DIR}/utils/perf-training/prof.fdata)
# Instrument clang with BOLT
add_custom_target(clang-instrumented
@@ -863,73 +862,11 @@ if (CLANG_BOLT_INSTRUMENT AND NOT LLVM_BUILD_INSTRUMENTED)
DEPENDS clang llvm-bolt
COMMAND llvm-bolt ${CLANG_PATH} -o ${CLANG_INSTRUMENTED}
-instrument --instrumentation-file-append-pid
- --instrumentation-file=${CMAKE_CURRENT_BINARY_DIR}/prof.fdata
+ --instrumentation-file=${BOLT_FDATA}
COMMENT "Instrumenting clang binary with BOLT"
VERBATIM
)
- # Make a symlink from clang-bolt.inst to clang++-bolt.inst
- add_custom_target(clang++-instrumented
- DEPENDS ${CLANGXX_INSTRUMENTED}
- )
- add_custom_command(OUTPUT ${CLANGXX_INSTRUMENTED}
- DEPENDS clang-instrumented
- COMMAND ${CMAKE_COMMAND} -E create_symlink
- ${CLANG_INSTRUMENTED}
- ${CLANGXX_INSTRUMENTED}
- COMMENT "Creating symlink from BOLT instrumented clang to clang++"
- VERBATIM
- )
-
- # Build specified targets with instrumented Clang to collect the profile
- set(STAMP_DIR ${CMAKE_CURRENT_BINARY_DIR}/bolt-instrumented-clang-stamps/)
- set(BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/bolt-instrumented-clang-bins/)
- set(build_configuration "$<CONFIG>")
- include(ExternalProject)
- ExternalProject_Add(bolt-instrumentation-profile
- DEPENDS clang++-instrumented
- PREFIX bolt-instrumentation-profile
- SOURCE_DIR ${CMAKE_SOURCE_DIR}
- STAMP_DIR ${STAMP_DIR}
- BINARY_DIR ${BINARY_DIR}
- EXCLUDE_FROM_ALL 1
- CMAKE_ARGS
- ${CLANG_BOLT_INSTRUMENT_EXTRA_CMAKE_FLAGS}
- # We shouldn't need to set this here, but INSTALL_DIR doesn't
- # seem to work, so instead I'm passing this through
- -DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}
- -DCMAKE_C_COMPILER=${CLANG_INSTRUMENTED}
- -DCMAKE_CXX_COMPILER=${CLANGXX_INSTRUMENTED}
- -DCMAKE_ASM_COMPILER=${CLANG_INSTRUMENTED}
- -DCMAKE_ASM_COMPILER_ID=Clang
- -DCMAKE_BUILD_TYPE=Release
- -DLLVM_ENABLE_PROJECTS=${CLANG_BOLT_INSTRUMENT_PROJECTS}
- -DLLVM_TARGETS_TO_BUILD=${LLVM_TARGETS_TO_BUILD}
- BUILD_COMMAND ${CMAKE_COMMAND} --build ${BINARY_DIR}
- --config ${build_configuration}
- --target ${CLANG_BOLT_INSTRUMENT_TARGETS}
- INSTALL_COMMAND ""
- STEP_TARGETS configure build
- USES_TERMINAL_CONFIGURE 1
- USES_TERMINAL_BUILD 1
- USES_TERMINAL_INSTALL 1
- )
-
- # Merge profiles into one using merge-fdata
- add_custom_target(clang-bolt-profile
- DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/prof.fdata
- )
- add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/prof.fdata
- DEPENDS merge-fdata bolt-instrumentation-profile-build
- WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
- COMMAND ${Python3_EXECUTABLE}
- ${CMAKE_CURRENT_SOURCE_DIR}/utils/perf-training/perf-helper.py merge-fdata
- $<TARGET_FILE:merge-fdata> ${CMAKE_CURRENT_BINARY_DIR}/prof.fdata
- ${CMAKE_CURRENT_BINARY_DIR}
- COMMENT "Preparing BOLT profile"
- VERBATIM
- )
-
# Optimize original (pre-bolt) Clang using the collected profile
set(CLANG_OPTIMIZED ${CMAKE_CURRENT_BINARY_DIR}/clang.bolt)
add_custom_target(clang-bolt
@@ -939,7 +876,7 @@ if (CLANG_BOLT_INSTRUMENT AND NOT LLVM_BUILD_INSTRUMENTED)
DEPENDS clang-bolt-profile
COMMAND llvm-bolt ${CLANG_PATH}
-o ${CLANG_OPTIMIZED}
- -data ${CMAKE_CURRENT_BINARY_DIR}/prof.fdata
+ -data ${BOLT_FDATA}
-reorder-blocks=ext-tsp -reorder-functions=hfsort+ -split-functions
-split-all-cold -split-eh -dyno-stats -icf=1 -use-gnu-stack
COMMAND ${CMAKE_COMMAND} -E rename ${CLANG_OPTIMIZED} $<TARGET_FILE:clang>
diff --git a/clang/cmake/caches/BOLT.cmake b/clang/cmake/caches/BOLT.cmake
index 78ab3b64386a4..7c75d60320f7e 100644
--- a/clang/cmake/caches/BOLT.cmake
+++ b/clang/cmake/caches/BOLT.cmake
@@ -1,9 +1,6 @@
set(CMAKE_BUILD_TYPE Release CACHE STRING "")
set(CLANG_BOLT_INSTRUMENT ON CACHE BOOL "")
-set(CLANG_BOLT_INSTRUMENT_PROJECTS "llvm" CACHE STRING "")
-set(CLANG_BOLT_INSTRUMENT_TARGETS "count" CACHE STRING "")
set(CMAKE_EXE_LINKER_FLAGS "-Wl,--emit-relocs,-znow" CACHE STRING "")
-set(CLANG_BOLT_INSTRUMENT_EXTRA_CMAKE_FLAGS "" CACHE STRING "")
set(LLVM_ENABLE_PROJECTS "bolt;clang" CACHE STRING "")
set(LLVM_TARGETS_TO_BUILD Native CACHE STRING "")
diff --git a/clang/utils/perf-training/CMakeLists.txt b/clang/utils/perf-training/CMakeLists.txt
index 0d551baba2ccf..c6d51863fb1b5 100644
--- a/clang/utils/perf-training/CMakeLists.txt
+++ b/clang/utils/perf-training/CMakeLists.txt
@@ -61,3 +61,26 @@ if(APPLE AND DTRACE AND NOT LLVM_TOOL_LLVM_DRIVER_BUILD)
COMMENT "Generating order file"
DEPENDS generate-dtrace-logs)
endif()
+
+if(CLANG_BOLT_INSTRUMENT AND NOT LLVM_BUILD_INSTRUMENTED)
+ configure_lit_site_cfg(
+ ${CMAKE_CURRENT_SOURCE_DIR}/bolt.lit.site.cfg.in
+ ${CMAKE_CURRENT_BINARY_DIR}/bolt-fdata/lit.site.cfg
+ )
+
+ add_lit_testsuite(generate-bolt-fdata "Generating BOLT profile for Clang"
+ ${CMAKE_CURRENT_BINARY_DIR}/bolt-fdata/
+ EXCLUDE_FROM_CHECK_ALL
+ DEPENDS clang-instrumented clear-bolt-fdata
+ )
+
+ add_custom_target(clear-bolt-fdata
+ COMMAND "${Python3_EXECUTABLE}" ${CMAKE_CURRENT_SOURCE_DIR}/perf-helper.py clean ${CMAKE_CURRENT_BINARY_DIR} fdata
+ COMMENT "Clearing old BOLT fdata")
+
+ # Merge profiles into one using merge-fdata
+ add_custom_target(clang-bolt-profile
+ COMMAND "${Python3_EXECUTABLE}" ${CMAKE_CURRENT_SOURCE_DIR}/perf-helper.py merge-fdata $<TARGET_FILE:merge-fdata> ${CMAKE_CURRENT_BINARY_DIR}/prof.fdata ${CMAKE_CURRENT_BINARY_DIR}
+ COMMENT "Merging BOLT fdata"
+ DEPENDS merge-fdata generate-bolt-fdata)
+endif()
diff --git a/clang/utils/perf-training/bolt.lit.cfg b/clang/utils/perf-training/bolt.lit.cfg
new file mode 100644
index 0000000000000..234ac855bd67c
--- /dev/null
+++ b/clang/utils/perf-training/bolt.lit.cfg
@@ -0,0 +1,20 @@
+# -*- Python -*-
+
+from lit import Test
+import lit.formats
+import lit.util
+import os
+import subprocess
+
+config.clang = os.path.realpath(lit.util.which('clang-bolt.inst', config.clang_tools_dir)).replace('\\', '/')
+
+config.name = 'Clang Perf Training'
+config.suffixes = ['.c', '.cc', '.cpp', '.m', '.mm', '.cu', '.ll', '.cl', '.s', '.S', '.modulemap', '.test']
+
+use_lit_shell = os.environ.get("LIT_USE_INTERNAL_SHELL")
+config.test_format = lit.formats.ShTest(use_lit_shell == "0")
+config.substitutions.append( ('%clang_cpp_skip_driver', ' %s --driver-mode=g++ ' % (config.clang)))
+config.substitutions.append( ('%clang_cpp', ' %s --driver-mode=g++ ' % (config.clang)))
+config.substitutions.append( ('%clang_skip_driver', ' %s ' % (config.clang)))
+config.substitutions.append( ('%clang', ' %s ' % (config.clang) ) )
+config.substitutions.append( ('%test_root', config.test_exec_root ) )
diff --git a/clang/utils/perf-training/bolt.lit.site.cfg.in b/clang/utils/perf-training/bolt.lit.site.cfg.in
new file mode 100644
index 0000000000000..3029319673fc2
--- /dev/null
+++ b/clang/utils/perf-training/bolt.lit.site.cfg.in
@@ -0,0 +1,14 @@
+ at LIT_SITE_CFG_IN_HEADER@
+
+import sys
+
+config.clang_tools_dir = lit_config.substitute("@CURRENT_TOOLS_DIR@")
+config.perf_helper_dir = "@CMAKE_CURRENT_SOURCE_DIR@"
+config.test_exec_root = "@CMAKE_CURRENT_BINARY_DIR@"
+config.test_source_root = "@CLANG_PGO_TRAINING_DATA@"
+config.target_triple = "@LLVM_TARGET_TRIPLE@"
+config.python_exe = "@Python3_EXECUTABLE@"
+config.clang_obj_root = path(r"@CLANG_BINARY_DIR@")
+
+# Let the main config do the real work.
+lit_config.load_config(config, "@CLANG_SOURCE_DIR@/utils/perf-training/bolt.lit.cfg")
More information about the cfe-commits
mailing list