[clang] [clang][perf-training] Fix profiling with -DCLANG_BOLT=perf (PR #119117)
Tom Stellard via cfe-commits
cfe-commits at lists.llvm.org
Mon Dec 9 17:53:57 PST 2024
https://github.com/tstellar updated https://github.com/llvm/llvm-project/pull/119117
>From 5d13b69039fab7c5960288cead18dc76f5d01f4f Mon Sep 17 00:00:00 2001
From: Tom Stellard <tstellar at redhat.com>
Date: Thu, 5 Dec 2024 15:01:27 +0000
Subject: [PATCH 1/3] [clang][perf-training] Fix profiling with
-DCLANG_BOLT=perf
This fixes the llvm-support build that generates the profile data.
However, I'm wondering if maybe we should disable llvm-suppot and
only run hello-world with -DCLANG_BOLT=perf. The bolt optimizations
with perf only give about a 3% performance increase (although maybe
with hw counters this would be better) and it takes a very long
time to convert all the perf profiles to the fdata format.
---
clang/utils/perf-training/bolt.lit.cfg | 22 +++++++++++++++----
.../perf-training/llvm-support/build.test | 4 ++--
2 files changed, 20 insertions(+), 6 deletions(-)
diff --git a/clang/utils/perf-training/bolt.lit.cfg b/clang/utils/perf-training/bolt.lit.cfg
index 1d0cf9a8a17a8e..04b18975275495 100644
--- a/clang/utils/perf-training/bolt.lit.cfg
+++ b/clang/utils/perf-training/bolt.lit.cfg
@@ -8,21 +8,32 @@ import subprocess
clang_bolt_mode = config.clang_bolt_mode.lower()
clang_binary = "clang"
-perf_wrapper = f"{config.python_exe} {config.perf_helper_dir}/perf-helper.py perf "
+perf_wrapper = f"{config.python_exe} {config.perf_helper_dir}/perf-helper.py perf"
if clang_bolt_mode == "instrument":
perf_wrapper = ""
clang_binary = config.clang_bolt_name
elif clang_bolt_mode == "lbr":
- perf_wrapper += " --lbr -- "
+ perf_wrapper += " --lbr --"
elif clang_bolt_mode == "perf":
- perf_wrapper += " -- "
+ perf_wrapper += " --"
else:
assert 0, "Unsupported CLANG_BOLT_MODE variable"
-config.clang = perf_wrapper + os.path.realpath(
+clang_nowrapper = os.path.realpath(
lit.util.which(clang_binary, config.clang_tools_dir)
).replace("\\", "/")
+config.clang = f'{perf_wrapper} {clang_nowrapper}'
+
+# We need to limit the number of build jobs with perf in order to avoid this
+# error:
+#
+# | Permission error mapping pages.
+# | Consider increasing /proc/sys/kernel/perf_event_mlock_kb,
+# | or try again with a smaller value of -m/--mmap_pages.
+ninja_args = ""
+if clang_bolt_mode != "instrument":
+ ninja_args = "-j1"
config.name = "Clang Perf Training"
config.suffixes = [
@@ -52,3 +63,6 @@ config.substitutions.append(("%test_root", config.test_exec_root))
config.substitutions.append(('%cmake_generator', config.cmake_generator))
config.substitutions.append(('%cmake', config.cmake_exe))
config.substitutions.append(('%llvm_src_dir', config.llvm_src_dir))
+config.substitutions.append(('%perf_cmake_compiler_launcher', perf_wrapper.replace(' ', ';')))
+config.substitutions.append(('%nowrapper_clang', clang_nowrapper))
+config.substitutions.append(('%ninja_args', ninja_args))
diff --git a/clang/utils/perf-training/llvm-support/build.test b/clang/utils/perf-training/llvm-support/build.test
index f29a594c846869..1f4d76502a3757 100644
--- a/clang/utils/perf-training/llvm-support/build.test
+++ b/clang/utils/perf-training/llvm-support/build.test
@@ -1,2 +1,2 @@
-RUN: %cmake -G %cmake_generator -B %t -S %llvm_src_dir -DCMAKE_C_COMPILER=%clang -DCMAKE_CXX_COMPILER=%clang -DCMAKE_CXX_FLAGS="--driver-mode=g++" -DCMAKE_BUILD_TYPE=Release
-RUN: %cmake --build %t -v --target LLVMSupport
+RUN: %cmake -G %cmake_generator -B %t -S %llvm_src_dir -DCMAKE_CXX_COMPILER_LAUNCHER="%perf_cmake_compiler_launcher" -DCMAKE_C_COMPILER="%nowrapper_clang" -DCMAKE_CXX_COMPILER="%nowrapper_clang" -DCMAKE_CXX_FLAGS="--driver-mode=g++" -DCMAKE_BUILD_TYPE=Release
+RUN: %cmake --build %t %ninja_args -v --target LLVMSupport
>From 9aa48ac20e931d8192cecfec6ef789ea936fa6ff Mon Sep 17 00:00:00 2001
From: Tom Stellard <tstellar at redhat.com>
Date: Mon, 9 Dec 2024 19:23:40 +0000
Subject: [PATCH 2/3] Revert "[clang][perf-training] Fix profiling with
-DCLANG_BOLT=perf"
This reverts commit 5d13b69039fab7c5960288cead18dc76f5d01f4f.
---
clang/utils/perf-training/bolt.lit.cfg | 22 ++++---------------
.../perf-training/llvm-support/build.test | 4 ++--
2 files changed, 6 insertions(+), 20 deletions(-)
diff --git a/clang/utils/perf-training/bolt.lit.cfg b/clang/utils/perf-training/bolt.lit.cfg
index 04b18975275495..1d0cf9a8a17a8e 100644
--- a/clang/utils/perf-training/bolt.lit.cfg
+++ b/clang/utils/perf-training/bolt.lit.cfg
@@ -8,32 +8,21 @@ import subprocess
clang_bolt_mode = config.clang_bolt_mode.lower()
clang_binary = "clang"
-perf_wrapper = f"{config.python_exe} {config.perf_helper_dir}/perf-helper.py perf"
+perf_wrapper = f"{config.python_exe} {config.perf_helper_dir}/perf-helper.py perf "
if clang_bolt_mode == "instrument":
perf_wrapper = ""
clang_binary = config.clang_bolt_name
elif clang_bolt_mode == "lbr":
- perf_wrapper += " --lbr --"
+ perf_wrapper += " --lbr -- "
elif clang_bolt_mode == "perf":
- perf_wrapper += " --"
+ perf_wrapper += " -- "
else:
assert 0, "Unsupported CLANG_BOLT_MODE variable"
-clang_nowrapper = os.path.realpath(
+config.clang = perf_wrapper + os.path.realpath(
lit.util.which(clang_binary, config.clang_tools_dir)
).replace("\\", "/")
-config.clang = f'{perf_wrapper} {clang_nowrapper}'
-
-# We need to limit the number of build jobs with perf in order to avoid this
-# error:
-#
-# | Permission error mapping pages.
-# | Consider increasing /proc/sys/kernel/perf_event_mlock_kb,
-# | or try again with a smaller value of -m/--mmap_pages.
-ninja_args = ""
-if clang_bolt_mode != "instrument":
- ninja_args = "-j1"
config.name = "Clang Perf Training"
config.suffixes = [
@@ -63,6 +52,3 @@ config.substitutions.append(("%test_root", config.test_exec_root))
config.substitutions.append(('%cmake_generator', config.cmake_generator))
config.substitutions.append(('%cmake', config.cmake_exe))
config.substitutions.append(('%llvm_src_dir', config.llvm_src_dir))
-config.substitutions.append(('%perf_cmake_compiler_launcher', perf_wrapper.replace(' ', ';')))
-config.substitutions.append(('%nowrapper_clang', clang_nowrapper))
-config.substitutions.append(('%ninja_args', ninja_args))
diff --git a/clang/utils/perf-training/llvm-support/build.test b/clang/utils/perf-training/llvm-support/build.test
index 1f4d76502a3757..f29a594c846869 100644
--- a/clang/utils/perf-training/llvm-support/build.test
+++ b/clang/utils/perf-training/llvm-support/build.test
@@ -1,2 +1,2 @@
-RUN: %cmake -G %cmake_generator -B %t -S %llvm_src_dir -DCMAKE_CXX_COMPILER_LAUNCHER="%perf_cmake_compiler_launcher" -DCMAKE_C_COMPILER="%nowrapper_clang" -DCMAKE_CXX_COMPILER="%nowrapper_clang" -DCMAKE_CXX_FLAGS="--driver-mode=g++" -DCMAKE_BUILD_TYPE=Release
-RUN: %cmake --build %t %ninja_args -v --target LLVMSupport
+RUN: %cmake -G %cmake_generator -B %t -S %llvm_src_dir -DCMAKE_C_COMPILER=%clang -DCMAKE_CXX_COMPILER=%clang -DCMAKE_CXX_FLAGS="--driver-mode=g++" -DCMAKE_BUILD_TYPE=Release
+RUN: %cmake --build %t -v --target LLVMSupport
>From d4480da2205be8e68a9139885ed2c4a008514920 Mon Sep 17 00:00:00 2001
From: Tom Stellard <tstellar at redhat.com>
Date: Tue, 10 Dec 2024 01:44:46 +0000
Subject: [PATCH 3/3] Fixes for PGO
---
clang/utils/perf-training/bolt.lit.cfg | 20 ++++++++++++++++++-
clang/utils/perf-training/lit.cfg | 5 +++++
.../perf-training/llvm-support/build.test | 4 ++--
3 files changed, 26 insertions(+), 3 deletions(-)
diff --git a/clang/utils/perf-training/bolt.lit.cfg b/clang/utils/perf-training/bolt.lit.cfg
index 1d0cf9a8a17a8e..3188f6357dc3c5 100644
--- a/clang/utils/perf-training/bolt.lit.cfg
+++ b/clang/utils/perf-training/bolt.lit.cfg
@@ -4,6 +4,7 @@ from lit import Test
import lit.formats
import lit.util
import os
+import re
import subprocess
clang_bolt_mode = config.clang_bolt_mode.lower()
@@ -20,9 +21,24 @@ elif clang_bolt_mode == "perf":
else:
assert 0, "Unsupported CLANG_BOLT_MODE variable"
-config.clang = perf_wrapper + os.path.realpath(
+clang_nowrapper = os.path.realpath(
lit.util.which(clang_binary, config.clang_tools_dir)
).replace("\\", "/")
+config.clang = perf_wrapper + clang_nowrapper
+config.cmake_compiler_args = "-DCMAKE_CXX_COMPILER_LAUNCHER='{0}' -DCMAKE_C_COMPILER_LAUNCHER='{0}' -DCMAKE_C_COMPILER='{1}' -DCMAKE_CXX_COMPILER='{1};--driver-mode=g++'".format(
+ re.sub(r"\s+", ";", perf_wrapper.rstrip()),
+ re.sub(r"\s+", ";", clang_nowrapper)
+)
+
+# We need to limit the number of build jobs with perf in order to avoid this
+# error:
+#
+# | Permission error mapping pages.
+# | Consider increasing /proc/sys/kernel/perf_event_mlock_kb,
+# | or try again with a smaller value of -m/--mmap_pages.
+ninja_args = ""
+if clang_bolt_mode != "instrument":
+ ninja_args = "-j1"
config.name = "Clang Perf Training"
config.suffixes = [
@@ -49,6 +65,8 @@ config.substitutions.append(("%clang_cpp", f" {config.clang} --driver-mode=g++ "
config.substitutions.append(("%clang_skip_driver", config.clang))
config.substitutions.append(("%clang", config.clang))
config.substitutions.append(("%test_root", config.test_exec_root))
+config.substitutions.append(("%cmake_compiler_args", config.cmake_compiler_args))
config.substitutions.append(('%cmake_generator', config.cmake_generator))
config.substitutions.append(('%cmake', config.cmake_exe))
config.substitutions.append(('%llvm_src_dir', config.llvm_src_dir))
+config.substitutions.append(('%ninja_args', ninja_args))
diff --git a/clang/utils/perf-training/lit.cfg b/clang/utils/perf-training/lit.cfg
index 654961e215da68..e788a7a01ef976 100644
--- a/clang/utils/perf-training/lit.cfg
+++ b/clang/utils/perf-training/lit.cfg
@@ -31,14 +31,19 @@ cc1_wrapper = '%s %s/perf-helper.py cc1' % (config.python_exe, config.perf_helpe
use_lit_shell = os.environ.get("LIT_USE_INTERNAL_SHELL")
config.test_format = lit.formats.ShTest(use_lit_shell == "0")
+config.cmake_compiler_args = '-DCMAKE_C_COMPILER="{0}" -DCMAKE_CXX_COMPILER="{0};--driver-mode=g++"'.format(
+ config.clang.replace(' ', ';')
+)
config.substitutions.append( ('%clang_cpp_skip_driver', ' %s %s %s ' % (cc1_wrapper, config.clang, sysroot_flags)))
config.substitutions.append( ('%clang_cpp', ' %s --driver-mode=g++ %s ' % (config.clang, sysroot_flags)))
config.substitutions.append( ('%clang_skip_driver', ' %s %s %s ' % (cc1_wrapper, config.clang, sysroot_flags)))
config.substitutions.append( ('%clang', '%s %s ' % (config.clang, sysroot_flags) ) )
config.substitutions.append( ('%test_root', config.test_exec_root ) )
+config.substitutions.append( ('%cmake_compiler_args', config.cmake_compiler_args))
config.substitutions.append( ('%cmake_generator', config.cmake_generator ) )
config.substitutions.append( ('%cmake', config.cmake_exe ) )
config.substitutions.append( ('%llvm_src_dir', config.llvm_src_dir ) )
+config.substitutions.append( ('%ninja_args', '' ) )
config.environment['LLVM_PROFILE_FILE'] = 'perf-training-%4m.profraw'
diff --git a/clang/utils/perf-training/llvm-support/build.test b/clang/utils/perf-training/llvm-support/build.test
index f29a594c846869..402a5b9e27ff88 100644
--- a/clang/utils/perf-training/llvm-support/build.test
+++ b/clang/utils/perf-training/llvm-support/build.test
@@ -1,2 +1,2 @@
-RUN: %cmake -G %cmake_generator -B %t -S %llvm_src_dir -DCMAKE_C_COMPILER=%clang -DCMAKE_CXX_COMPILER=%clang -DCMAKE_CXX_FLAGS="--driver-mode=g++" -DCMAKE_BUILD_TYPE=Release
-RUN: %cmake --build %t -v --target LLVMSupport
+RUN: %cmake -G %cmake_generator -B %t -S %llvm_src_dir %cmake_compiler_args -DCMAKE_BUILD_TYPE=Release
+RUN: %cmake --build %t %ninja_args -v --target LLVMSupport
More information about the cfe-commits
mailing list