[clang] 3dab7fe - [CMake] Add clang-bolt target
Amir Ayupov via cfe-commits
cfe-commits at lists.llvm.org
Fri Sep 23 01:10:41 PDT 2022
Author: Amir Ayupov
Date: 2022-09-23T10:10:31+02:00
New Revision: 3dab7fede2019c399d793c43ca9ea5a4f2d5031f
URL: https://github.com/llvm/llvm-project/commit/3dab7fede2019c399d793c43ca9ea5a4f2d5031f
DIFF: https://github.com/llvm/llvm-project/commit/3dab7fede2019c399d793c43ca9ea5a4f2d5031f.diff
LOG: [CMake] Add clang-bolt target
This patch adds `CLANG_BOLT_INSTRUMENT` option that applies BOLT instrumentation
to Clang, performs a bootstrap build with the resulting Clang, merges resulting
fdata files into a single profile file, and uses it to perform BOLT optimization
on the original Clang binary.
The projects and targets used for bootstrap/profile collection are configurable via
`CLANG_BOLT_INSTRUMENT_PROJECTS` and `CLANG_BOLT_INSTRUMENT_TARGETS`.
The defaults are "llvm" and "count" respectively, which results in a profile with
~5.3B dynamically executed instructions.
The intended use of the functionality is through BOLT CMake cache file, similar
to PGO 2-stage build:
```
cmake <llvm-project>/llvm -C <llvm-project>/clang/cmake/caches/BOLT.cmake
ninja clang++-bolt # pulls clang-bolt
```
Stats with a recent checkout (clang-16), pre-built BOLT and Clang, 72vCPU/224G
| CMake configure with host Clang + BOLT.cmake | 1m6.592s
| Instrumenting Clang with BOLT | 2m50.508s
| CMake configure `llvm` with instrumented Clang | 5m46.364s (~5x slowdown)
| CMake build `not` with instrumented Clang |0m6.456s
| Merging fdata files | 0m9.439s
| Optimizing Clang with BOLT | 0m39.201s
Building Clang:
```cmake ../llvm-project/llvm -DCMAKE_C_COMPILER=... -DCMAKE_CXX_COMPILER=...
-DCMAKE_BUILD_TYPE=Release -DLLVM_ENABLE_PROJECTS=clang
-DLLVM_TARGETS_TO_BUILD=Native -GNinja```
| | Release | BOLT-optimized
| cmake | 0m24.016s | 0m22.333s
| ninja clang | 5m55.692s | 4m35.122s
I know it's not rigorous, but shows a ballpark figure.
Reviewed By: phosek
Differential Revision: https://reviews.llvm.org/D132975
Added:
clang/cmake/caches/BOLT.cmake
Modified:
clang/CMakeLists.txt
clang/utils/perf-training/perf-helper.py
Removed:
################################################################################
diff --git a/clang/CMakeLists.txt b/clang/CMakeLists.txt
index 02ce5de4652de..22b5118c83eda 100644
--- a/clang/CMakeLists.txt
+++ b/clang/CMakeLists.txt
@@ -443,7 +443,7 @@ CMAKE_DEPENDENT_OPTION(CLANG_PLUGIN_SUPPORT
"HAVE_CLANG_PLUGIN_SUPPORT" OFF)
# If libstdc++ is statically linked, clang-repl needs to statically link libstdc++
-# itself, which is not possible in many platforms because of current limitations in
+# itself, which is not possible in many platforms because of current limitations in
# JIT stack. (more platforms need to be supported by JITLink)
if(NOT LLVM_STATIC_LINK_CXX_STDLIB)
set(HAVE_CLANG_REPL_SUPPORT ON)
@@ -881,6 +881,118 @@ if (CLANG_ENABLE_BOOTSTRAP)
endforeach()
endif()
+if (CLANG_BOLT_INSTRUMENT)
+ set(CLANG_PATH ${LLVM_RUNTIME_OUTPUT_INTDIR}/clang)
+ set(CLANGXX_PATH ${CLANG_PATH}++)
+ set(CLANG_INSTRUMENTED ${CLANG_PATH}-bolt.inst)
+ set(CLANGXX_INSTRUMENTED ${CLANGXX_PATH}-bolt.inst)
+ set(CLANG_OPTIMIZED ${CLANG_PATH}-bolt)
+ set(CLANGXX_OPTIMIZED ${CLANGXX_PATH}-bolt)
+
+ # Instrument clang with BOLT
+ add_custom_target(clang-instrumented
+ DEPENDS ${CLANG_INSTRUMENTED}
+ )
+ add_custom_command(OUTPUT ${CLANG_INSTRUMENTED}
+ DEPENDS clang llvm-bolt
+ COMMAND llvm-bolt ${CLANG_PATH} -o ${CLANG_INSTRUMENTED}
+ -instrument --instrumentation-file-append-pid
+ --instrumentation-file=${CMAKE_CURRENT_BINARY_DIR}/prof.fdata
+ COMMENT "Instrumenting clang binary with BOLT"
+ VERBATIM
+ )
+
+ # Make a symlink from clang-bolt.inst to clang++-bolt.inst
+ add_custom_target(clang++-instrumented
+ DEPENDS ${CLANGXX_INSTRUMENTED}
+ )
+ add_custom_command(OUTPUT ${CLANGXX_INSTRUMENTED}
+ DEPENDS clang-instrumented
+ COMMAND ${CMAKE_COMMAND} -E create_symlink
+ ${CLANG_INSTRUMENTED}
+ ${CLANGXX_INSTRUMENTED}
+ COMMENT "Creating symlink from BOLT instrumented clang to clang++"
+ VERBATIM
+ )
+
+ # Build specified targets with instrumented Clang to collect the profile
+ set(STAMP_DIR ${CMAKE_CURRENT_BINARY_DIR}/bolt-instrumented-clang-stamps/)
+ set(BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/bolt-instrumented-clang-bins/)
+ set(build_configuration "$<CONFIG>")
+ include(ExternalProject)
+ ExternalProject_Add(bolt-instrumentation-profile
+ DEPENDS clang++-instrumented
+ PREFIX bolt-instrumentation-profile
+ SOURCE_DIR ${CMAKE_SOURCE_DIR}
+ STAMP_DIR ${STAMP_DIR}
+ BINARY_DIR ${BINARY_DIR}
+ EXCLUDE_FROM_ALL 1
+ CMAKE_ARGS
+ ${CLANG_BOLT_INSTRUMENT_EXTRA_CMAKE_FLAGS}
+ # We shouldn't need to set this here, but INSTALL_DIR doesn't
+ # seem to work, so instead I'm passing this through
+ -DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}
+ -DCMAKE_C_COMPILER=${CLANG_INSTRUMENTED}
+ -DCMAKE_CXX_COMPILER=${CLANGXX_INSTRUMENTED}
+ -DCMAKE_ASM_COMPILER=${CLANG_INSTRUMENTED}
+ -DCMAKE_ASM_COMPILER_ID=Clang
+ -DCMAKE_BUILD_TYPE=Release
+ -DLLVM_ENABLE_PROJECTS=${CLANG_BOLT_INSTRUMENT_PROJECTS}
+ -DLLVM_TARGETS_TO_BUILD=${LLVM_TARGETS_TO_BUILD}
+ BUILD_COMMAND ${CMAKE_COMMAND} --build ${BINARY_DIR}
+ --config ${build_configuration}
+ --target ${CLANG_BOLT_INSTRUMENT_TARGETS}
+ INSTALL_COMMAND ""
+ STEP_TARGETS configure build
+ USES_TERMINAL_CONFIGURE 1
+ USES_TERMINAL_BUILD 1
+ USES_TERMINAL_INSTALL 1
+ )
+
+ # Merge profiles into one using merge-fdata
+ add_custom_target(clang-bolt-profile
+ DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/prof.fdata
+ )
+ add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/prof.fdata
+ DEPENDS merge-fdata bolt-instrumentation-profile-build
+ WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
+ COMMAND ${Python3_EXECUTABLE}
+ ${CMAKE_CURRENT_SOURCE_DIR}/utils/perf-training/perf-helper.py merge-fdata
+ $<TARGET_FILE:merge-fdata> ${CMAKE_CURRENT_BINARY_DIR}/prof.fdata
+ ${CMAKE_CURRENT_BINARY_DIR}
+ COMMENT "Preparing BOLT profile"
+ VERBATIM
+ )
+
+ # Optimize original (pre-bolt) Clang using the collected profile
+ add_custom_target(clang-bolt
+ DEPENDS ${CLANG_OPTIMIZED}
+ )
+ add_custom_command(OUTPUT ${CLANG_OPTIMIZED}
+ DEPENDS clang-bolt-profile
+ COMMAND llvm-bolt ${CLANG_PATH}
+ -o ${CLANG_OPTIMIZED}
+ -data ${CMAKE_CURRENT_BINARY_DIR}/prof.fdata
+ -reorder-blocks=ext-tsp -reorder-functions=hfsort+ -split-functions
+ -split-all-cold -split-eh -dyno-stats -icf=1 -use-gnu-stack
+ COMMENT "Optimizing Clang with BOLT"
+ VERBATIM
+ )
+
+ # Make a symlink from clang-bolt to clang++-bolt
+ add_custom_target(clang++-bolt
+ DEPENDS ${CLANGXX_OPTIMIZED}
+ )
+ add_custom_command(OUTPUT ${CLANGXX_OPTIMIZED}
+ DEPENDS clang-bolt
+ COMMAND ${CMAKE_COMMAND} -E create_symlink
+ ${CLANG_OPTIMIZED}
+ ${CLANGXX_OPTIMIZED}
+ COMMENT "Creating symlink from BOLT optimized clang to clang++"
+ VERBATIM
+ )
+endif()
+
if (LLVM_ADD_NATIVE_VISUALIZERS_TO_SOLUTION)
add_subdirectory(utils/ClangVisualizers)
endif()
diff --git a/clang/cmake/caches/BOLT.cmake b/clang/cmake/caches/BOLT.cmake
new file mode 100644
index 0000000000000..65444c8044c3b
--- /dev/null
+++ b/clang/cmake/caches/BOLT.cmake
@@ -0,0 +1,15 @@
+set(CMAKE_BUILD_TYPE Release CACHE STRING "")
+set(CLANG_BOLT_INSTRUMENT ON CACHE BOOL "")
+set(CLANG_BOLT_INSTRUMENT_PROJECTS "llvm" CACHE STRING "")
+set(CLANG_BOLT_INSTRUMENT_TARGETS "count" CACHE STRING "")
+set(CMAKE_EXE_LINKER_FLAGS "-Wl,--emit-relocs,-znow" CACHE STRING "")
+set(CLANG_BOLT_INSTRUMENT_EXTRA_CMAKE_FLAGS "" CACHE STRING "")
+
+set(LLVM_ENABLE_PROJECTS "bolt;clang" CACHE STRING "")
+set(LLVM_TARGETS_TO_BUILD Native CACHE STRING "")
+
+# Disable function splitting enabled by default in GCC8+
+if("${CMAKE_CXX_COMPILER_ID}" MATCHES "GNU")
+ set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fno-reorder-blocks-and-partition")
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-reorder-blocks-and-partition")
+endif()
diff --git a/clang/utils/perf-training/perf-helper.py b/clang/utils/perf-training/perf-helper.py
index 88708a92712a9..c6a815e654736 100644
--- a/clang/utils/perf-training/perf-helper.py
+++ b/clang/utils/perf-training/perf-helper.py
@@ -38,7 +38,7 @@ def clean(args):
def merge(args):
if len(args) != 3:
- print('Usage: %s clean <llvm-profdata> <output> <path>\n' % __file__ +
+ print('Usage: %s merge <llvm-profdata> <output> <path>\n' % __file__ +
'\tMerges all profraw files from path into output.')
return 1
cmd = [args[0], 'merge', '-o', args[1]]
@@ -46,6 +46,16 @@ def merge(args):
subprocess.check_call(cmd)
return 0
+def merge_fdata(args):
+ if len(args) != 3:
+ print('Usage: %s merge-fdata <merge-fdata> <output> <path>\n' % __file__ +
+ '\tMerges all fdata files from path into output.')
+ return 1
+ cmd = [args[0], '-o', args[1]]
+ cmd.extend(findFilesWithExtension(args[2], "fdata"))
+ subprocess.check_call(cmd)
+ return 0
+
def dtrace(args):
parser = argparse.ArgumentParser(prog='perf-helper dtrace',
description='dtrace wrapper for order file generation')
@@ -395,10 +405,12 @@ def genOrderFile(args):
return 0
commands = {'clean' : clean,
- 'merge' : merge,
+ 'merge' : merge,
'dtrace' : dtrace,
'cc1' : cc1,
- 'gen-order-file' : genOrderFile}
+ 'gen-order-file' : genOrderFile,
+ 'merge-fdata' : merge_fdata,
+ }
def main():
f = commands[sys.argv[1]]
More information about the cfe-commits
mailing list