[clang] 780f69c - [Clang][CMake] Add CSSPGO support to LLVM_BUILD_INSTRUMENTED (#79942)
via cfe-commits
cfe-commits at lists.llvm.org
Wed Oct 1 15:25:39 PDT 2025
Author: Amir Ayupov
Date: 2025-10-01T15:25:34-07:00
New Revision: 780f69cd922d8925648e11e771e77f0b46190e5b
URL: https://github.com/llvm/llvm-project/commit/780f69cd922d8925648e11e771e77f0b46190e5b
DIFF: https://github.com/llvm/llvm-project/commit/780f69cd922d8925648e11e771e77f0b46190e5b.diff
LOG: [Clang][CMake] Add CSSPGO support to LLVM_BUILD_INSTRUMENTED (#79942)
Build on Clang-BOLT infrastructure to collect sample profile for CSSPGO.
Add CSSPGO.cmake and BOLT-CSSPGO.cmake to automate CSSPGO/+BOLT
Clang builds.
Note that `CLANG_PGO_TRAINING_DATA_SOURCE_DIR` is required as built-in
training set is inadequate for collecting sampled profile.
Hardware compatibility: CSSPGO requires synchronized (0-skid) call
and branch stacks, which is only available with Intel PEBS (Sandy
Bridge+),
AMD Zen3 with BRS, Zen4 with LBRv2+LBR_PMC_FREEZE, and Zen5 with LBRv2.
This patch adds support for Intel `br_inst_retired.near_taken:uppp`
event.
Test Plan:
Added BOLT-CSSPGO.cmake with same use as BOLT-PGO.cmake,
e.g. for bootstrapped ThinLTO+CSSPGO+BOLT, with CSSPGO profile collected
from LLVM build, and BOLT profile collected from Hello World
(instrumentation):
```
cmake -B clang-csspgo-bolt -S /path/to/llvm-project/llvm \
-DLLVM_ENABLE_LLD=ON -DBOOTSTRAP_LLVM_ENABLE_LLD=ON \
-DBOOTSTRAP_BOOTSTRAP_LLVM_ENABLE_LLD=ON \
-DPGO_INSTRUMENT_LTO=Thin \
-DBOOTSTRAP_CLANG_PGO_TRAINING_DATA_SOURCE_DIR=/path/to/llvm-project/llvm \
-GNinja -C /path/to/llvm-project/clang/cmake/caches/BOLT-CSSPGO.cmake
ninja stage2-clang-bolt
...
warning: Sample PGO is estimated to optimize better with 19.5x more samples. Please consider increasing sampling rate or profiling for longer duration to get more samples.
...
[2800/2801] Optimizing Clang with BOLT
BOLT-INFO: 8189 out of 106942 functions in the binary (7.7%) have non-empty execution profile
13776393 : taken branches (-42.1%)
```
Performance testing with Clang:
- Setup: Clang-BOLT testing harness
https://github.com/aaupov/llvm-devmtg-2022/commit/9f2b46f67a1930a51c58a0e4894637a8c64c570e
- CSSPGO training: building LLVM,
- InstrPGO training: building Hello World,
- BOLT training: building Hello World, instrumentation,
- benchmark: building small LLVM tool (not),
- 2S Intel SKX Xeon 6138 with 40C/80T and 256GB RAM, using 20C/40T for
build,
- Results, wall time, lower is better
- Baseline (bootstrapped build): 10.36s,
- InstrPGO + ThinLTO: 9.34s,
- CSSPGO + ThinLTO: 8.85s.
- BOLT results, for reference:
- Baseline: 9.09s,
- InstrPGO + ThinLTO: 9.09s,
- CSSPGO + ThinLTO: 8.58s.
---------
Co-authored-by: Matthias Braun <matze at braunis.de>
Added:
clang/cmake/caches/BOLT-CSSPGO.cmake
clang/cmake/caches/CSSPGO.cmake
Modified:
clang/CMakeLists.txt
clang/cmake/caches/BOLT-PGO.cmake
clang/utils/perf-training/CMakeLists.txt
clang/utils/perf-training/perf-helper.py
llvm/CMakeLists.txt
llvm/cmake/modules/HandleLLVMOptions.cmake
Removed:
################################################################################
diff --git a/clang/CMakeLists.txt b/clang/CMakeLists.txt
index 4eaa712899856..e4cb1a359620d 100644
--- a/clang/CMakeLists.txt
+++ b/clang/CMakeLists.txt
@@ -754,11 +754,22 @@ if (CLANG_ENABLE_BOOTSTRAP)
if(BOOTSTRAP_LLVM_BUILD_INSTRUMENTED)
add_dependencies(clang-bootstrap-deps llvm-profdata)
set(PGO_OPT -DLLVM_PROFDATA=${LLVM_RUNTIME_OUTPUT_INTDIR}/llvm-profdata)
+ string(TOUPPER "${BOOTSTRAP_LLVM_BUILD_INSTRUMENTED}" BOOTSTRAP_LLVM_BUILD_INSTRUMENTED)
+ if (BOOTSTRAP_LLVM_BUILD_INSTRUMENTED STREQUAL "CSSPGO")
+ add_dependencies(clang-bootstrap-deps llvm-profgen)
+ list(APPEND PGO_OPT -DLLVM_PROFGEN=${LLVM_RUNTIME_OUTPUT_INTDIR}/llvm-profgen)
+ endif()
endif()
if(LLVM_BUILD_INSTRUMENTED)
- add_dependencies(clang-bootstrap-deps generate-profdata)
- set(PGO_OPT -DLLVM_PROFDATA_FILE=${CMAKE_CURRENT_BINARY_DIR}/utils/perf-training/clang.profdata)
+ string(TOUPPER "${LLVM_BUILD_INSTRUMENTED}" LLVM_BUILD_INSTRUMENTED)
+ if (LLVM_BUILD_INSTRUMENTED STREQUAL "CSSPGO")
+ add_dependencies(clang-bootstrap-deps generate-sprofdata)
+ set(PGO_OPT -DLLVM_SPROFDATA_FILE=${CMAKE_CURRENT_BINARY_DIR}/utils/perf-training/clang.sprofdata)
+ else()
+ add_dependencies(clang-bootstrap-deps generate-profdata)
+ set(PGO_OPT -DLLVM_PROFDATA_FILE=${CMAKE_CURRENT_BINARY_DIR}/utils/perf-training/clang.profdata)
+ endif()
# Use the current tools for LTO instead of the instrumented ones
list(APPEND _BOOTSTRAP_DEFAULT_PASSTHROUGH
CMAKE_CXX_COMPILER
diff --git a/clang/cmake/caches/BOLT-CSSPGO.cmake b/clang/cmake/caches/BOLT-CSSPGO.cmake
new file mode 100644
index 0000000000000..b1c204ad57ac5
--- /dev/null
+++ b/clang/cmake/caches/BOLT-CSSPGO.cmake
@@ -0,0 +1,3 @@
+set(BOLT_PGO_CMAKE_CACHE "CSSPGO" CACHE STRING "")
+set(BOOTSTRAP_CMAKE_BUILD_TYPE RelWithDebInfo CACHE STRING "")
+include(${CMAKE_CURRENT_LIST_DIR}/BOLT-PGO.cmake)
diff --git a/clang/cmake/caches/BOLT-PGO.cmake b/clang/cmake/caches/BOLT-PGO.cmake
index 1a04ca9a74e5e..cc9410fd0e95c 100644
--- a/clang/cmake/caches/BOLT-PGO.cmake
+++ b/clang/cmake/caches/BOLT-PGO.cmake
@@ -1,3 +1,4 @@
+set(BOLT_PGO_CMAKE_CACHE "PGO" CACHE STRING "")
set(LLVM_ENABLE_PROJECTS "bolt;clang;lld" CACHE STRING "")
set(CLANG_BOOTSTRAP_TARGETS
@@ -14,4 +15,4 @@ set(BOOTSTRAP_CLANG_BOOTSTRAP_TARGETS
set(PGO_BUILD_CONFIGURATION
${CMAKE_CURRENT_LIST_DIR}/BOLT.cmake
CACHE STRING "")
-include(${CMAKE_CURRENT_LIST_DIR}/PGO.cmake)
+include(${CMAKE_CURRENT_LIST_DIR}/${BOLT_PGO_CMAKE_CACHE}.cmake)
diff --git a/clang/cmake/caches/CSSPGO.cmake b/clang/cmake/caches/CSSPGO.cmake
new file mode 100644
index 0000000000000..59e08a64f8aad
--- /dev/null
+++ b/clang/cmake/caches/CSSPGO.cmake
@@ -0,0 +1,2 @@
+set(BOOTSTRAP_LLVM_BUILD_INSTRUMENTED "CSSPGO" CACHE STRING "")
+include(${CMAKE_CURRENT_LIST_DIR}/PGO.cmake)
diff --git a/clang/utils/perf-training/CMakeLists.txt b/clang/utils/perf-training/CMakeLists.txt
index 1d7bb788a15ed..2cd4c4c29c2bb 100644
--- a/clang/utils/perf-training/CMakeLists.txt
+++ b/clang/utils/perf-training/CMakeLists.txt
@@ -6,6 +6,10 @@ set(CLANG_PGO_TRAINING_DATA "${CMAKE_CURRENT_SOURCE_DIR}" CACHE PATH
set(CLANG_PGO_TRAINING_DATA_SOURCE_DIR OFF CACHE STRING "Path to source directory containing cmake project with source files to use for generating pgo data")
set(CLANG_PGO_TRAINING_DEPS "" CACHE STRING "Extra dependencies needed to build the PGO training data.")
+add_custom_target(clear-perf-data
+ COMMAND "${Python3_EXECUTABLE}" ${CMAKE_CURRENT_SOURCE_DIR}/perf-helper.py clean ${CMAKE_CURRENT_BINARY_DIR} perf.data
+ COMMENT "Clearing old perf data")
+
option(CLANG_PGO_TRAINING_USE_LLVM_BUILD "Use LLVM build for generating PGO data" ON)
llvm_canonicalize_cmake_booleans(
@@ -21,7 +25,7 @@ if(LLVM_BUILD_INSTRUMENTED)
add_lit_testsuite(generate-profraw "Generating clang PGO data"
${CMAKE_CURRENT_BINARY_DIR}/pgo-data/
EXCLUDE_FROM_CHECK_ALL
- DEPENDS clear-profraw
+ DEPENDS clear-profraw clang
)
add_custom_target(clear-profraw
@@ -55,6 +59,32 @@ if(LLVM_BUILD_INSTRUMENTED)
USE_TOOLCHAIN EXLUDE_FROM_ALL NO_INSTALL DEPENDS generate-profraw)
add_dependencies(generate-profdata generate-profraw-external)
endif()
+
+ if(NOT LLVM_PROFGEN)
+ find_program(LLVM_PROFGEN llvm-profgen)
+ endif()
+
+ if(NOT LLVM_PROFGEN)
+ message(STATUS "To enable converting CSSPGO samples LLVM_PROFGEN has to point to llvm-profgen")
+ elseif(NOT CLANG_PGO_TRAINING_DATA_SOURCE_DIR)
+ message(STATUS "CLANG_PGO_TRAINING_DATA_SOURCE_DIR must be set to collect CSSPGO samples")
+ else()
+ set(PERF_HELPER "${Python3_EXECUTABLE}" ${CMAKE_CURRENT_SOURCE_DIR}/perf-helper.py)
+ set(CLANG_SPROFDATA ${CMAKE_CURRENT_BINARY_DIR}/clang.sprofdata)
+ add_custom_command(
+ OUTPUT ${CLANG_SPROFDATA}
+ # Execute generate-profraw-external under perf
+ COMMAND ${PERF_HELPER} perf --csspgo -- ${CMAKE_COMMAND} --build ${CMAKE_BINARY_DIR} --target generate-profraw-external
+ # Convert perf profile into profraw
+ COMMAND ${PERF_HELPER} perf2prof ${LLVM_PROFGEN} $<TARGET_FILE:clang> ${CMAKE_CURRENT_BINARY_DIR}
+ # Merge profdata
+ COMMAND ${PERF_HELPER} merge --sample ${LLVM_PROFDATA} ${CLANG_SPROFDATA} ${CMAKE_CURRENT_BINARY_DIR}
+ DEPENDS clang ${CLANG_PGO_TRAINING_DEPS} clear-perf-data generate-profraw-external-clean
+ VERBATIM
+ USES_TERMINAL
+ )
+ add_custom_target(generate-sprofdata DEPENDS ${CLANG_SPROFDATA})
+ endif()
endif()
endif()
@@ -104,8 +134,4 @@ if(CLANG_BOLT AND NOT LLVM_BUILD_INSTRUMENTED)
COMMAND "${Python3_EXECUTABLE}" ${CMAKE_CURRENT_SOURCE_DIR}/perf-helper.py clean ${CMAKE_CURRENT_BINARY_DIR} fdata
COMMENT "Clearing old BOLT fdata")
- add_custom_target(clear-perf-data
- COMMAND "${Python3_EXECUTABLE}" ${CMAKE_CURRENT_SOURCE_DIR}/perf-helper.py clean ${CMAKE_CURRENT_BINARY_DIR} perf.data
- COMMENT "Clearing old perf data")
-
endif()
diff --git a/clang/utils/perf-training/perf-helper.py b/clang/utils/perf-training/perf-helper.py
index ab4491d2a6b6d..1c7904ec62163 100644
--- a/clang/utils/perf-training/perf-helper.py
+++ b/clang/utils/perf-training/perf-helper.py
@@ -45,14 +45,22 @@ def clean(args):
def merge(args):
- if len(args) < 3:
- print(
- "Usage: %s merge <llvm-profdata> <output> <paths>\n" % __file__
- + "\tMerges all profraw files from path into output."
- )
- return 1
- cmd = [args[0], "merge", "-o", args[1]]
- for path in args[2:]:
+ parser = argparse.ArgumentParser(
+ prog="perf-helper merge",
+ description="Merges all profraw files from path(s) into output",
+ )
+ parser.add_argument("profdata", help="Path to llvm-profdata tool")
+ parser.add_argument("output", help="Output filename")
+ parser.add_argument(
+ "paths", nargs="+", help="Folder(s) containing input profraw files"
+ )
+ parser.add_argument("--sample", action="store_true", help="Sample profile")
+ opts = parser.parse_args(args)
+
+ cmd = [opts.profdata, "merge", "-o", opts.output]
+ if opts.sample:
+ cmd += ["--sample"]
+ for path in opts.paths:
cmd.extend(findFilesWithExtension(path, "profraw"))
subprocess.check_call(cmd)
return 0
@@ -73,25 +81,30 @@ def merge_fdata(args):
def perf(args):
parser = argparse.ArgumentParser(
- prog="perf-helper perf", description="perf wrapper for BOLT profile collection"
+ prog="perf-helper perf",
+ description="perf wrapper for BOLT/CSSPGO profile collection",
)
parser.add_argument(
"--lbr", action="store_true", help="Use perf with branch stacks"
)
+ parser.add_argument("--csspgo", action="store_true", help="Enable CSSPGO flags")
parser.add_argument("cmd", nargs=argparse.REMAINDER, help="")
opts = parser.parse_args(args)
cmd = opts.cmd[1:]
+ event = "br_inst_retired.near_taken:uppp" if opts.csspgo else "cycles:u"
perf_args = [
"perf",
"record",
- "--event=cycles:u",
+ f"--event={event}",
"--freq=max",
"--output=%d.perf.data" % os.getpid(),
]
- if opts.lbr:
+ if opts.lbr or opts.csspgo:
perf_args += ["--branch-filter=any,u"]
+ if opts.csspgo:
+ perf_args += ["-g", "--call-graph=fp"]
perf_args.extend(cmd)
start_time = time.time()
@@ -127,6 +140,30 @@ def perf2bolt(args):
return 0
+def perf2prof(args):
+ parser = argparse.ArgumentParser(
+ prog="perf-helper perf2prof",
+ description="perf to CSSPGO prof conversion wrapper",
+ )
+ parser.add_argument("profgen", help="Path to llvm-profgen binary")
+ parser.add_argument("binary", help="Input binary")
+ parser.add_argument("paths", nargs="+", help="Path containing perf.data files")
+ opts = parser.parse_args(args)
+
+ profgen_args = [opts.profgen, f"--binary={opts.binary}"]
+ for path in opts.paths:
+ for filename in findFilesWithExtension(path, "perf.data"):
+ subprocess.run(
+ [
+ *profgen_args,
+ f"--perfdata={filename}",
+ f"--output={filename}.profraw",
+ ],
+ check=True,
+ )
+ return 0
+
+
def dtrace(args):
parser = argparse.ArgumentParser(
prog="perf-helper dtrace",
@@ -660,7 +697,10 @@ def bolt_optimize(args):
process.check_returncode()
if opts.method in ["PERF", "LBR"]:
- perf2bolt([opts.bolt, opts.perf_training_binary_dir, opts.input])
+ args = [opts.bolt, opts.perf_training_binary_dir, opts.input]
+ if opts.method == "LBR":
+ args.extend("--lbr")
+ perf2bolt(args)
merge_fdata([opts.merge_fdata, opts.fdata, opts.perf_training_binary_dir])
@@ -707,6 +747,7 @@ def bolt_optimize(args):
"merge-fdata": merge_fdata,
"perf": perf,
"perf2bolt": perf2bolt,
+ "perf2prof": perf2prof,
}
diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt
index b98192968a3ab..c450ee5a3d72e 100644
--- a/llvm/CMakeLists.txt
+++ b/llvm/CMakeLists.txt
@@ -1011,6 +1011,9 @@ set(LLVM_ENABLE_PER_TARGET_RUNTIME_DIR ${LLVM_ENABLE_PER_TARGET_RUNTIME_DIR_defa
set(LLVM_PROFDATA_FILE "" CACHE FILEPATH
"Profiling data file to use when compiling in order to improve runtime performance.")
+set(LLVM_SPROFDATA_FILE "" CACHE FILEPATH
+ "Sampling profiling data file to use when compiling in order to improve runtime performance.")
+
if(LLVM_INCLUDE_TESTS)
# All LLVM Python files should be compatible down to this minimum version.
set(LLVM_MINIMUM_PYTHON_VERSION 3.8)
diff --git a/llvm/cmake/modules/HandleLLVMOptions.cmake b/llvm/cmake/modules/HandleLLVMOptions.cmake
index 8eca29f8a03f5..d4195db6368d7 100644
--- a/llvm/cmake/modules/HandleLLVMOptions.cmake
+++ b/llvm/cmake/modules/HandleLLVMOptions.cmake
@@ -1184,7 +1184,7 @@ if(LLVM_ENABLE_EH AND NOT LLVM_ENABLE_RTTI)
message(FATAL_ERROR "Exception handling requires RTTI. You must set LLVM_ENABLE_RTTI to ON")
endif()
-set(LLVM_BUILD_INSTRUMENTED OFF CACHE STRING "Build LLVM and tools with PGO instrumentation. May be specified as IR or Frontend")
+set(LLVM_BUILD_INSTRUMENTED OFF CACHE STRING "Build LLVM and tools with PGO instrumentation. May be specified as IR, Frontend, CSIR, CSSPGO")
set(LLVM_VP_COUNTERS_PER_SITE "1.5" CACHE STRING "Value profile counters to use per site for IR PGO with Clang")
mark_as_advanced(LLVM_BUILD_INSTRUMENTED LLVM_VP_COUNTERS_PER_SITE)
string(TOUPPER "${LLVM_BUILD_INSTRUMENTED}" uppercase_LLVM_BUILD_INSTRUMENTED)
@@ -1217,6 +1217,19 @@ if (LLVM_BUILD_INSTRUMENTED)
CMAKE_EXE_LINKER_FLAGS
CMAKE_SHARED_LINKER_FLAGS)
endif()
+ elseif(uppercase_LLVM_BUILD_INSTRUMENTED STREQUAL "CSSPGO")
+ if (CMAKE_CXX_COMPILER_ID MATCHES "Clang")
+ append("-fno-omit-frame-pointer -mno-omit-leaf-frame-pointer -fno-optimize-sibling-calls -fpseudo-probe-for-profiling -fdebug-info-for-profiling"
+ CMAKE_CXX_FLAGS
+ CMAKE_C_FLAGS)
+ if(NOT LINKER_IS_LLD_LINK)
+ append("-fno-omit-frame-pointer -mno-omit-leaf-frame-pointer -fno-optimize-sibling-calls -fpseudo-probe-for-profiling -fdebug-info-for-profiling"
+ CMAKE_EXE_LINKER_FLAGS
+ CMAKE_SHARED_LINKER_FLAGS)
+ endif()
+ else()
+ message(FATAL_ERROR "LLVM_BUILD_INSTRUMENTED=CSSPGO can only be specified when compiling with clang")
+ endif()
else()
append("-fprofile-instr-generate=\"${LLVM_PROFILE_FILE_PATTERN}\""
CMAKE_CXX_FLAGS
@@ -1269,6 +1282,21 @@ elseif(LLVM_PROFDATA_FILE)
message(WARNING "LLVM_PROFDATA_FILE specified, but ${LLVM_PROFDATA_FILE} not found")
endif()
+if(LLVM_SPROFDATA_FILE AND EXISTS ${LLVM_SPROFDATA_FILE})
+ if ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang" )
+ append("-fpseudo-probe-for-profiling -fprofile-sample-use=\"${LLVM_SPROFDATA_FILE}\""
+ CMAKE_CXX_FLAGS
+ CMAKE_C_FLAGS)
+ if(NOT LINKER_IS_LLD_LINK)
+ append("-fpseudo-probe-for-profiling -fprofile-sample-use=\"${LLVM_SPROFDATA_FILE}\""
+ CMAKE_EXE_LINKER_FLAGS
+ CMAKE_SHARED_LINKER_FLAGS)
+ endif()
+ else()
+ message(FATAL_ERROR "LLVM_SPROFDATA_FILE can only be specified when compiling with clang")
+ endif()
+endif()
+
option(LLVM_BUILD_INSTRUMENTED_COVERAGE "Build LLVM and tools with Code Coverage instrumentation" Off)
option(LLVM_INDIVIDUAL_TEST_COVERAGE "Emit individual coverage file for each test case." OFF)
mark_as_advanced(LLVM_BUILD_INSTRUMENTED_COVERAGE)
More information about the cfe-commits
mailing list