[clang] Add support for dynamic libraries in CLANG_BOLT (PR #127020)

via cfe-commits cfe-commits at lists.llvm.org
Wed Feb 12 23:58:08 PST 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-clang

Author: None (serge-sans-paille)

<details>
<summary>Changes</summary>



---
Full diff: https://github.com/llvm/llvm-project/pull/127020.diff


2 Files Affected:

- (modified) clang/tools/driver/CMakeLists.txt (+36-10) 
- (modified) clang/utils/perf-training/perf-helper.py (+92-55) 


``````````diff
diff --git a/clang/tools/driver/CMakeLists.txt b/clang/tools/driver/CMakeLists.txt
index ad336fcc45b60..10ea5de387220 100644
--- a/clang/tools/driver/CMakeLists.txt
+++ b/clang/tools/driver/CMakeLists.txt
@@ -23,10 +23,14 @@ if(CLANG_PLUGIN_SUPPORT)
   set(support_plugins SUPPORT_PLUGINS)
 endif()
 
+set(CLANG_BOLT_ALLOWLIST INSTRUMENT PERF LBR)
 set(CLANG_BOLT OFF CACHE STRING "Apply BOLT optimization to Clang. \
-  May be specified as Instrument or Perf or LBR to use a particular profiling \
+May be specified as one of ${CLANG_BOLT_ALLOWLIST} to use a particular profiling \
   mechanism.")
 string(TOUPPER "${CLANG_BOLT}" CLANG_BOLT)
+if (CLANG_BOLT AND NOT CLANG_BOLT IN_LIST CLANG_BOLT_ALLOWLIST)
+    message(FATAL_ERROR "Specified CLANG_BOLT value '${CLANG_BOLT}' is not one of ${CLANG_BOLT_ALLOWLIST}.")
+endif()
 
 if (CLANG_BOLT AND NOT LLVM_BUILD_INSTRUMENTED)
   set(CLANG_BOLT_DEPS clear-bolt-fdata llvm-bolt llvm-readobj)
@@ -164,6 +168,28 @@ if (CLANG_BOLT AND NOT LLVM_BUILD_INSTRUMENTED)
   )
   set(LIT_COMMAND "${lit_base_dir}/${lit_file_name}")
 
+  set(CLANG_BOLT_INPUTS $<TARGET_FILE:clang>)
+  set(CLANG_INSTRUMENTED_OUTPUTS ${CLANG_INSTRUMENTED})
+
+  # Add in dynamically linked libraries, if needs be. Currently only supported
+  # on Linux because it relies on LD_PRELOAD for instrumentation.
+  if (CMAKE_SYSTEM_NAME STREQUAL "Linux")
+    if (CLANG_LINK_CLANG_DYLIB)
+     set(CLANG_CPP_BOLT_INSTRUMENTED "clang-cxx-bolt.inst" CACHE STRING
+       "Name of BOLT-instrumented Clang library")
+     set(CLANG_CPP_INSTRUMENTED ${LLVM_RUNTIME_OUTPUT_INTDIR}/${CLANG_CPP_BOLT_INSTRUMENTED})
+     list(APPEND CLANG_BOLT_INPUTS $<TARGET_FILE:clang-cpp>)
+     list(APPEND CLANG_INSTRUMENTED_OUTPUTS ${CLANG_CPP_INSTRUMENTED})
+    endif()
+    if (LLVM_LINK_LLVM_DYLIB)
+      set(LLVM_BOLT_INSTRUMENTED "LLVM-bolt.inst" CACHE STRING
+        "Name of BOLT-instrumented LLVM library")
+      set(LLVM_INSTRUMENTED ${LLVM_RUNTIME_OUTPUT_INTDIR}/${LLVM_BOLT_INSTRUMENTED})
+      list(APPEND CLANG_BOLT_INPUTS $<TARGET_FILE:LLVM>)
+      list(APPEND CLANG_INSTRUMENTED_OUTPUTS ${LLVM_INSTRUMENTED})
+    endif()
+  endif()
+
   # This POST_BUILD command is executed unconditionally even if the clang target
   # is already built.  We need to wrap the whole bolt optimization process in
   # a single python wrapper, so that we can first check if the binary has
@@ -172,15 +198,15 @@ if (CLANG_BOLT AND NOT LLVM_BUILD_INSTRUMENTED)
     TARGET clang POST_BUILD
     COMMAND  "${Python3_EXECUTABLE}" ${CMAKE_CURRENT_SOURCE_DIR}/../../utils/perf-training/perf-helper.py
              bolt-optimize
-	     --method ${CLANG_BOLT}
-	     --input $<TARGET_FILE:clang>
-	     --instrumented-output ${CLANG_INSTRUMENTED}
-	     --fdata ${BOLT_FDATA}
-	     --perf-training-binary-dir ${PERF_TRAINING_BINARY_DIR}
-	     --readelf $<TARGET_FILE:llvm-readobj>
-	     --bolt $<TARGET_FILE:llvm-bolt>
-	     --lit "${LIT_COMMAND}"
-	     --merge-fdata $<TARGET_FILE:merge-fdata>
+             --method ${CLANG_BOLT}
+             --input "${CLANG_BOLT_INPUTS}"
+             --instrumented-output "${CLANG_INSTRUMENTED_OUTPUTS}"
+             --fdata ${BOLT_FDATA}
+             --perf-training-binary-dir ${PERF_TRAINING_BINARY_DIR}
+             --readelf $<TARGET_FILE:llvm-readobj>
+             --bolt $<TARGET_FILE:llvm-bolt>
+             --lit "${LIT_COMMAND}"
+             --merge-fdata $<TARGET_FILE:merge-fdata>
     COMMENT "Optimizing Clang with BOLT"
     USES_TERMINAL
     VERBATIM
diff --git a/clang/utils/perf-training/perf-helper.py b/clang/utils/perf-training/perf-helper.py
index 55c5160a71c4f..ea32ef216bcaa 100644
--- a/clang/utils/perf-training/perf-helper.py
+++ b/clang/utils/perf-training/perf-helper.py
@@ -559,6 +559,22 @@ def genOrderFile(args):
 
     return 0
 
+def filter_bolt_optimized(inputs, instrumented_outputs)
+    new_inputs = []
+    new_instrumented_ouputs = []
+    for input, instrumented_output in zip(inputs, instrumented_outputs):
+        output = subprocess.check_output(
+            [opts.readelf, "-WS", input], universal_newlines=True
+        )
+
+        # This binary has already been bolt-optimized, so skip further processing.
+        if re.search("\\.bolt\\.org\\.text", output, re.MULTILINE):
+            print(f"Skipping {input}, it's already instrumented")
+        else:
+            new_inputs.append(input)
+            new_instrumented_ouputs.append(instrumented_output)
+    return new_inputs, new_instrumented_ouputs
+
 
 def bolt_optimize(args):
     parser = argparse.ArgumentParser("%prog  [options] ")
@@ -574,47 +590,66 @@ def bolt_optimize(args):
 
     opts = parser.parse_args(args)
 
-    output = subprocess.check_output(
-        [opts.readelf, "-WS", opts.input], universal_newlines=True
-    )
+    inputs = opts.input.split(';')
+    instrumented_outputs = opts.instrumented_output.split(';')
+    assert len(inputs) == len(instrumented_outputs), "inconsistent --input / --instrumented-output arguments"
 
-    # This binary has already been bolt-optimized, so skip further processing.
-    if re.search("\\.bolt\\.org\\.text", output, re.MULTILINE):
+    inputs, instrumented_outputs = filter_bolt_optimized(inputs, instrumented_outputs)
+    if not inputs:
         return 0
 
+    environ = os.environ.copy()
     if opts.method == "INSTRUMENT":
-        process = subprocess.run(
-            [
-                opts.bolt,
-                opts.input,
-                "-o",
-                opts.instrumented_output,
-                "-instrument",
-                "--instrumentation-file-append-pid",
-                f"--instrumentation-file={opts.fdata}",
-            ],
-            stdout=subprocess.PIPE,
-            stderr=subprocess.STDOUT,
-            text=True,
-        )
+        preloads = []
+        for input, instrumented_output in zip(inputs, instrumented_outputs):
+            args = [
+                    opts.bolt,
+                    input,
+                    "-o",
+                    instrumented_output,
+                    "-instrument",
+                    "--instrumentation-file-append-pid",
+                    f"--instrumentation-file={opts.fdata}",
+                ]
+            print("Running: " + " ".join(args))
+            process = subprocess.run(
+                args,
+                stdout=subprocess.PIPE,
+                stderr=subprocess.STDOUT,
+                text=True,
+            )
 
-        print(process.args)
-        for line in process.stdout:
-            sys.stdout.write(line)
-        process.check_returncode()
+            for line in process.stdout:
+                sys.stdout.write(line)
+            process.check_returncode()
 
-    process = subprocess.run(
-        [
+            output = subprocess.check_output(
+                [opts.readelf, "--file-header", input], universal_newlines=True
+            )
+            if re.search(r"Type:\s*((Shared)|(DYN))", output):
+                # force using the instrumented version
+                preloads.append(instrumented_output)
+
+        if preloads:
+            print("Patching execution environment for dynamic library")
+            environ["LD_PRELOAD"] = os.pathsep.join(preloads)
+
+
+    args = [
             sys.executable,
             opts.lit,
-            os.path.join(opts.perf_training_binary_dir, "bolt-fdata"),
-        ],
+            "-v",
+            os.path.join(opts.perf_training_binary_dir, f"bolt-fdata"),
+        ]
+    print("Running: " + " ".join(args))
+    process = subprocess.run(
+        args,
         stdout=subprocess.PIPE,
         stderr=subprocess.STDOUT,
         text=True,
+        env=environ,
     )
 
-    print(process.args)
     for line in process.stdout:
         sys.stdout.write(line)
     process.check_returncode()
@@ -624,35 +659,37 @@ def bolt_optimize(args):
 
     merge_fdata([opts.merge_fdata, opts.fdata, opts.perf_training_binary_dir])
 
-    shutil.copy(opts.input, f"{opts.input}-prebolt")
+    for input in inputs:
+        shutil.copy(input, f"{input}-prebolt")
 
-    process = subprocess.run(
-        [
-            opts.bolt,
-            f"{opts.input}-prebolt",
-            "-o",
-            opts.input,
-            "-data",
-            opts.fdata,
-            "-reorder-blocks=ext-tsp",
-            "-reorder-functions=cdsort",
-            "-split-functions",
-            "-split-all-cold",
-            "-split-eh",
-            "-dyno-stats",
-            "-use-gnu-stack",
-            "-update-debug-sections",
-            "-nl" if opts.method == "PERF" else "",
-        ],
-        stdout=subprocess.PIPE,
-        stderr=subprocess.STDOUT,
-        text=True,
-    )
+        args = [
+                opts.bolt,
+                f"{input}-prebolt",
+                "-o",
+                input,
+                "-data",
+                opts.fdata,
+                "-reorder-blocks=ext-tsp",
+                "-reorder-functions=cdsort",
+                "-split-functions",
+                "-split-all-cold",
+                "-split-eh",
+                "-dyno-stats",
+                "-use-gnu-stack",
+                "-update-debug-sections",
+                "-nl" if opts.method == "PERF" else "",
+            ]
+        print("Running: " + " ".join(args))
+        process = subprocess.run(
+            args,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.STDOUT,
+            text=True,
+        )
 
-    print(process.args)
-    for line in process.stdout:
-        sys.stdout.write(line)
-    process.check_returncode()
+        for line in process.stdout:
+            sys.stdout.write(line)
+        process.check_returncode()
 
 
 commands = {

``````````

</details>


https://github.com/llvm/llvm-project/pull/127020


More information about the cfe-commits mailing list