[llvm] [clang] [Clang][CMake] Add CSSPGO support to LLVM_BUILD_INSTRUMENTED (PR #79942)
Amir Ayupov via cfe-commits
cfe-commits at lists.llvm.org
Mon Jan 29 20:18:40 PST 2024
https://github.com/aaupov created https://github.com/llvm/llvm-project/pull/79942
Build on Clang-BOLT infrastructure to collect sample profiles for CSSPGO.
Add clang/cmake/caches/CSSPGO.cmake to automate CSSPGO Clang build.
Differential Revision: https://reviews.llvm.org/D155419
Test Plan:
Added CSSPGO.cmake with same use as PGO.cmake, e.g. for bootstrapped ThinLTO+CSSPGO:
```
cmake -B csspgo -S /path/to/llvm-project/llvm \
-DLLVM_ENABLE_LLD=ON -DBOOTSTRAP_LLVM_ENABLE_LLD=ON \
-DBOOTSTRAP_BOOTSTRAP_LLVM_ENABLE_LLD=ON \
-DPGO_INSTRUMENT_LTO=Thin \
-GNinja -C/path/to/llvm-project/clang/cmake/caches/CSSPGO.cmake
ninja stage2
```
>From 90686f2cd5e210f9ca974c10f8c1224a825c1315 Mon Sep 17 00:00:00 2001
From: Amir Ayupov <aaupov at fb.com>
Date: Wed, 19 Jul 2023 20:30:29 -0700
Subject: [PATCH] [Clang][CMake] Add CSSPGO support to LLVM_BUILD_INSTRUMENTED
Build on Clang-BOLT infrastructure to collect sample profiles for CSSPGO.
Add clang/cmake/caches/CSSPGO.cmake to automate CSSPGO Clang build.
Differential Revision: https://reviews.llvm.org/D155419
---
clang/CMakeLists.txt | 12 ++++-
clang/cmake/caches/CSSPGO.cmake | 3 ++
clang/utils/perf-training/CMakeLists.txt | 32 ++++++++++++-
clang/utils/perf-training/lit.cfg | 6 +++
clang/utils/perf-training/lit.site.cfg.in | 1 +
clang/utils/perf-training/perf-helper.py | 54 ++++++++++++++++++----
llvm/CMakeLists.txt | 3 ++
llvm/cmake/modules/HandleLLVMOptions.cmake | 26 ++++++++++-
8 files changed, 123 insertions(+), 14 deletions(-)
create mode 100644 clang/cmake/caches/CSSPGO.cmake
diff --git a/clang/CMakeLists.txt b/clang/CMakeLists.txt
index 47fc2e4886cfc..5d16442ac7bc3 100644
--- a/clang/CMakeLists.txt
+++ b/clang/CMakeLists.txt
@@ -741,11 +741,21 @@ if (CLANG_ENABLE_BOOTSTRAP)
if(BOOTSTRAP_LLVM_BUILD_INSTRUMENTED)
add_dependencies(clang-bootstrap-deps llvm-profdata)
set(PGO_OPT -DLLVM_PROFDATA=${LLVM_RUNTIME_OUTPUT_INTDIR}/llvm-profdata)
+ string(TOUPPER "${BOOTSTRAP_LLVM_BUILD_INSTRUMENTED}" BOOTSTRAP_LLVM_BUILD_INSTRUMENTED)
+ if (BOOTSTRAP_LLVM_BUILD_INSTRUMENTED STREQUAL "CSSPGO")
+ add_dependencies(clang-bootstrap-deps llvm-profgen)
+ list(APPEND PGO_OPT -DLLVM_PROFGEN=${LLVM_RUNTIME_OUTPUT_INTDIR}/llvm-profgen)
+ endif()
endif()
if(LLVM_BUILD_INSTRUMENTED)
+ string(TOUPPER "${LLVM_BUILD_INSTRUMENTED}" LLVM_BUILD_INSTRUMENTED)
add_dependencies(clang-bootstrap-deps generate-profdata)
- set(PGO_OPT -DLLVM_PROFDATA_FILE=${CMAKE_CURRENT_BINARY_DIR}/utils/perf-training/clang.profdata)
+ if (LLVM_BUILD_INSTRUMENTED STREQUAL "CSSPGO")
+ set(PGO_OPT -DLLVM_SPROFDATA_FILE=${CMAKE_CURRENT_BINARY_DIR}/utils/perf-training/clang.profdata)
+ else()
+ set(PGO_OPT -DLLVM_PROFDATA_FILE=${CMAKE_CURRENT_BINARY_DIR}/utils/perf-training/clang.profdata)
+ endif()
# Use the current tools for LTO instead of the instrumented ones
list(APPEND _BOOTSTRAP_DEFAULT_PASSTHROUGH
CMAKE_CXX_COMPILER
diff --git a/clang/cmake/caches/CSSPGO.cmake b/clang/cmake/caches/CSSPGO.cmake
new file mode 100644
index 0000000000000..34159068d5ea3
--- /dev/null
+++ b/clang/cmake/caches/CSSPGO.cmake
@@ -0,0 +1,3 @@
+set(CMAKE_BUILD_TYPE RelWithDebInfo CACHE STRING "")
+set(BOOTSTRAP_LLVM_BUILD_INSTRUMENTED "CSSPGO" CACHE STRING "")
+include(${CMAKE_CURRENT_LIST_DIR}/PGO.cmake)
diff --git a/clang/utils/perf-training/CMakeLists.txt b/clang/utils/perf-training/CMakeLists.txt
index 601f40902fa34..489a097cd49c8 100644
--- a/clang/utils/perf-training/CMakeLists.txt
+++ b/clang/utils/perf-training/CMakeLists.txt
@@ -14,8 +14,33 @@ if(LLVM_BUILD_INSTRUMENTED)
DEPENDS clang clear-profraw ${CLANG_PERF_TRAINING_DEPS}
)
+ add_custom_target(generate-profdata-deps)
+ string(TOUPPER "${LLVM_BUILD_INSTRUMENTED}" uppercase_LLVM_BUILD_INSTRUMENTED)
+ if (uppercase_LLVM_BUILD_INSTRUMENTED STREQUAL "CSSPGO")
+ set(PROFDATA_SAMPLE "--sample")
+ if(NOT LLVM_PROFGEN)
+ find_program(LLVM_PROFGEN llvm-profgen)
+ endif()
+
+ if(NOT LLVM_PROFGEN)
+ message(STATUS "To enable converting CSSPGO samples LLVM_PROFGEN has to point to llvm-profgen")
+ endif()
+
+ # Convert perf profiles into profraw
+ add_custom_target(convert-perf-profraw
+ COMMAND "${Python3_EXECUTABLE}"
+ ${CMAKE_CURRENT_SOURCE_DIR}/perf-helper.py perf2prof ${LLVM_PROFGEN}
+ $<TARGET_FILE:clang> ${CMAKE_CURRENT_BINARY_DIR}
+ COMMENT "Converting perf profiles into profraw"
+ DEPENDS generate-profraw)
+ add_dependencies(generate-profdata-deps convert-perf-profraw)
+ else()
+ add_dependencies(generate-profdata-deps generate-profraw)
+ endif()
+
add_custom_target(clear-profraw
COMMAND "${Python3_EXECUTABLE}" ${CMAKE_CURRENT_SOURCE_DIR}/perf-helper.py clean ${CMAKE_CURRENT_BINARY_DIR} profraw
+ COMMAND "${Python3_EXECUTABLE}" ${CMAKE_CURRENT_SOURCE_DIR}/perf-helper.py clean ${CMAKE_CURRENT_BINARY_DIR} perf.data
COMMENT "Clearing old profraw data")
if(NOT LLVM_PROFDATA)
@@ -26,9 +51,12 @@ if(LLVM_BUILD_INSTRUMENTED)
message(STATUS "To enable merging PGO data LLVM_PROFDATA has to point to llvm-profdata")
else()
add_custom_target(generate-profdata
- COMMAND "${Python3_EXECUTABLE}" ${CMAKE_CURRENT_SOURCE_DIR}/perf-helper.py merge ${LLVM_PROFDATA} ${CMAKE_CURRENT_BINARY_DIR}/clang.profdata ${CMAKE_CURRENT_BINARY_DIR}
+ COMMAND "${Python3_EXECUTABLE}"
+ ${CMAKE_CURRENT_SOURCE_DIR}/perf-helper.py merge ${LLVM_PROFDATA}
+ ${CMAKE_CURRENT_BINARY_DIR}/clang.profdata ${CMAKE_CURRENT_BINARY_DIR}
+ ${PROFDATA_SAMPLE}
COMMENT "Merging profdata"
- DEPENDS generate-profraw)
+ DEPENDS generate-profdata-deps)
endif()
endif()
diff --git a/clang/utils/perf-training/lit.cfg b/clang/utils/perf-training/lit.cfg
index 0bd06c0d44f65..c6b54f461f1e2 100644
--- a/clang/utils/perf-training/lit.cfg
+++ b/clang/utils/perf-training/lit.cfg
@@ -28,6 +28,12 @@ config.name = 'Clang Perf Training'
config.suffixes = ['.c', '.cc', '.cpp', '.m', '.mm', '.cu', '.ll', '.cl', '.s', '.S', '.modulemap', '.test']
cc1_wrapper = '%s %s/perf-helper.py cc1' % (config.python_exe, config.perf_helper_dir)
+if config.llvm_build_instrumented.upper() == "CSSPGO":
+ perf_wrapper = "%s %s/perf-helper.py perf --lbr --call-graph --event=br_inst_retired.near_taken:uppp -- " % (
+ config.python_exe,
+ config.perf_helper_dir,
+ )
+ cc1_wrapper = perf_wrapper
use_lit_shell = os.environ.get("LIT_USE_INTERNAL_SHELL")
config.test_format = lit.formats.ShTest(use_lit_shell == "0")
diff --git a/clang/utils/perf-training/lit.site.cfg.in b/clang/utils/perf-training/lit.site.cfg.in
index fae93065a4edf..0ae05c10b6d88 100644
--- a/clang/utils/perf-training/lit.site.cfg.in
+++ b/clang/utils/perf-training/lit.site.cfg.in
@@ -8,6 +8,7 @@ config.test_exec_root = "@CMAKE_CURRENT_BINARY_DIR@"
config.test_source_root = "@CLANG_PGO_TRAINING_DATA@"
config.target_triple = "@LLVM_TARGET_TRIPLE@"
config.python_exe = "@Python3_EXECUTABLE@"
+config.llvm_build_instrumented = "@LLVM_BUILD_INSTRUMENTED@"
# Let the main config do the real work.
lit_config.load_config(config, "@CLANG_SOURCE_DIR@/utils/perf-training/lit.cfg")
diff --git a/clang/utils/perf-training/perf-helper.py b/clang/utils/perf-training/perf-helper.py
index 959bdba5c98cc..96fe571966b9e 100644
--- a/clang/utils/perf-training/perf-helper.py
+++ b/clang/utils/perf-training/perf-helper.py
@@ -42,14 +42,20 @@ def clean(args):
def merge(args):
- if len(args) != 3:
- print(
- "Usage: %s merge <llvm-profdata> <output> <path>\n" % __file__
- + "\tMerges all profraw files from path into output."
- )
- return 1
- cmd = [args[0], "merge", "-o", args[1]]
- cmd.extend(findFilesWithExtension(args[2], "profraw"))
+ parser = argparse.ArgumentParser(
+ prog="perf-helper merge",
+ description="Merges all profraw files from path into output"
+ )
+ parser.add_argument("profdata", help="Path to llvm-profdata tool")
+ parser.add_argument("output", help="Output filename")
+ parser.add_argument("path", help="Folder containing input profraw files")
+ parser.add_argument("--sample", action="store_true", help="Sample profile")
+ opts = parser.parse_args(args)
+
+ cmd = [opts.profdata, "merge", "-o", opts.output]
+ if opts.sample:
+ cmd += ["--sample"]
+ cmd.extend(findFilesWithExtension(opts.path, "profraw"))
subprocess.check_call(cmd)
return 0
@@ -69,11 +75,19 @@ def merge_fdata(args):
def perf(args):
parser = argparse.ArgumentParser(
- prog="perf-helper perf", description="perf wrapper for BOLT profile collection"
+ prog="perf-helper perf",
+ description="perf wrapper for BOLT/CSSPGO profile collection"
)
parser.add_argument(
"--lbr", action="store_true", help="Use perf with branch stacks"
)
+ parser.add_argument(
+ "--call-graph", action="store_true", help="Collect call graph"
+ )
+ parser.add_argument(
+ "--event", help="PMU event name, defaults to cycles:u",
+ default="cycles:u"
+ )
parser.add_argument("cmd", nargs=argparse.REMAINDER, help="")
opts = parser.parse_args(args)
@@ -82,12 +96,14 @@ def perf(args):
perf_args = [
"perf",
"record",
- "--event=cycles:u",
+ f"--event={opts.event}",
"--freq=max",
"--output=%d.perf.data" % os.getpid(),
]
if opts.lbr:
perf_args += ["--branch-filter=any,u"]
+ if opts.call_graph:
+ perf_args += ["-g", "--call-graph=fp"]
perf_args.extend(cmd)
start_time = time.time()
@@ -123,6 +139,23 @@ def perf2bolt(args):
return 0
+def perf2prof(args):
+ parser = argparse.ArgumentParser(
+ prog="perf-helper perf2prof",
+ description="perf to CSSPGO prof conversion wrapper",
+ )
+ parser.add_argument("profgen", help="Path to llvm-profgen binary")
+ parser.add_argument("binary", help="Input binary")
+ parser.add_argument("path", help="Path containing perf.data files")
+ opts = parser.parse_args(args)
+
+ profgen_args = [opts.profgen, f"--binary={opts.binary}"]
+ for filename in findFilesWithExtension(opts.path, "perf.data"):
+ subprocess.check_call(profgen_args + [f"--perfdata={filename}",
+ f"--output={filename}.profraw"])
+ return 0
+
+
def dtrace(args):
parser = argparse.ArgumentParser(
prog="perf-helper dtrace",
@@ -565,6 +598,7 @@ def genOrderFile(args):
"merge-fdata": merge_fdata,
"perf": perf,
"perf2bolt": perf2bolt,
+ "perf2prof": perf2prof,
}
diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt
index 485c76b8bb936..4421ce7e59b92 100644
--- a/llvm/CMakeLists.txt
+++ b/llvm/CMakeLists.txt
@@ -879,6 +879,9 @@ set(LLVM_ENABLE_PER_TARGET_RUNTIME_DIR ${LLVM_ENABLE_PER_TARGET_RUNTIME_DIR_defa
set(LLVM_PROFDATA_FILE "" CACHE FILEPATH
"Profiling data file to use when compiling in order to improve runtime performance.")
+set(LLVM_SPROFDATA_FILE "" CACHE FILEPATH
+ "Sampling profiling data file to use when compiling in order to improve runtime performance.")
+
if(LLVM_INCLUDE_TESTS)
# Lit test suite requires at least python 3.6
set(LLVM_MINIMUM_PYTHON_VERSION 3.6)
diff --git a/llvm/cmake/modules/HandleLLVMOptions.cmake b/llvm/cmake/modules/HandleLLVMOptions.cmake
index 0699a8586fcc7..3754dff598451 100644
--- a/llvm/cmake/modules/HandleLLVMOptions.cmake
+++ b/llvm/cmake/modules/HandleLLVMOptions.cmake
@@ -1116,7 +1116,7 @@ endif()
option(LLVM_ENABLE_IR_PGO "Build LLVM and tools with IR PGO instrumentation (deprecated)" Off)
mark_as_advanced(LLVM_ENABLE_IR_PGO)
-set(LLVM_BUILD_INSTRUMENTED OFF CACHE STRING "Build LLVM and tools with PGO instrumentation. May be specified as IR or Frontend")
+set(LLVM_BUILD_INSTRUMENTED OFF CACHE STRING "Build LLVM and tools with PGO instrumentation. May be specified as IR, Frontend, CSIR, CSSPGO")
set(LLVM_VP_COUNTERS_PER_SITE "1.5" CACHE STRING "Value profile counters to use per site for IR PGO with Clang")
mark_as_advanced(LLVM_BUILD_INSTRUMENTED LLVM_VP_COUNTERS_PER_SITE)
string(TOUPPER "${LLVM_BUILD_INSTRUMENTED}" uppercase_LLVM_BUILD_INSTRUMENTED)
@@ -1149,6 +1149,15 @@ if (LLVM_BUILD_INSTRUMENTED)
CMAKE_EXE_LINKER_FLAGS
CMAKE_SHARED_LINKER_FLAGS)
endif()
+ elseif(uppercase_LLVM_BUILD_INSTRUMENTED STREQUAL "CSSPGO")
+ append("-fno-omit-frame-pointer -mno-omit-leaf-frame-pointer -fno-optimize-sibling-calls -fpseudo-probe-for-profiling"
+ CMAKE_CXX_FLAGS
+ CMAKE_C_FLAGS)
+ if(NOT LINKER_IS_LLD_LINK)
+ append("-fno-omit-frame-pointer -mno-omit-leaf-frame-pointer -fno-optimize-sibling-calls -fpseudo-probe-for-profiling"
+ CMAKE_EXE_LINKER_FLAGS
+ CMAKE_SHARED_LINKER_FLAGS)
+ endif()
else()
append("-fprofile-instr-generate=\"${LLVM_PROFILE_FILE_PATTERN}\""
CMAKE_CXX_FLAGS
@@ -1199,6 +1208,21 @@ if(LLVM_PROFDATA_FILE AND EXISTS ${LLVM_PROFDATA_FILE})
endif()
endif()
+if(LLVM_SPROFDATA_FILE AND EXISTS ${LLVM_SPROFDATA_FILE})
+ if ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang" )
+ append("-fpseudo-probe-for-profiling -fprofile-sample-use=\"${LLVM_SPROFDATA_FILE}\""
+ CMAKE_CXX_FLAGS
+ CMAKE_C_FLAGS)
+ if(NOT LINKER_IS_LLD_LINK)
+ append("-fpseudo-probe-for-profiling -fprofile-sample-use=\"${LLVM_SPROFDATA_FILE}\""
+ CMAKE_EXE_LINKER_FLAGS
+ CMAKE_SHARED_LINKER_FLAGS)
+ endif()
+ else()
+ message(FATAL_ERROR "LLVM_SPROFDATA_FILE can only be specified when compiling with clang")
+ endif()
+endif()
+
option(LLVM_BUILD_INSTRUMENTED_COVERAGE "Build LLVM and tools with Code Coverage instrumentation" Off)
option(LLVM_INDIVIDUAL_TEST_COVERAGE "Emit individual coverage file for each test case." OFF)
mark_as_advanced(LLVM_BUILD_INSTRUMENTED_COVERAGE)
More information about the cfe-commits
mailing list