[llvm] f649599 - [CMake] Use LLVM own tools in extract_symbols.py
Igor Kudrin via llvm-commits
llvm-commits at lists.llvm.org
Mon May 15 16:21:54 PDT 2023
Author: Igor Kudrin
Date: 2023-05-15T16:20:19-07:00
New Revision: f649599ea93301bd0d0a2b8e450d1f77425ea92e
URL: https://github.com/llvm/llvm-project/commit/f649599ea93301bd0d0a2b8e450d1f77425ea92e
DIFF: https://github.com/llvm/llvm-project/commit/f649599ea93301bd0d0a2b8e450d1f77425ea92e.diff
LOG: [CMake] Use LLVM own tools in extract_symbols.py
As for now, 'extract_symbols.py' can use several tools to extract
symbols from object files and libraries and to guess if the target is
32-bit Windows. The tools are being found via PATH, so in most cases,
they are just system tools. This approach has a number of limitations,
in particular:
* System tools may not be able to handle the target format in case of
cross-platform builds,
* They cannot read symbols from LLVM bitcode files, so the staged LTO
build with plugins is not supported,
* The auto-selected tools may be suboptimal (see D113557),
* Support for multiple tools for a single task increases the complexity
of the script code.
The patch proposes using LLVM's own tools to solve these issues.
Specifically, 'llvm-readobj' detects the target platform, and 'llvm-nm'
reads symbols from all supported formats, including bitcode files. The
tools can be built in Release mode for the host platform or overridden
using CMake settings 'LLVM_READOBJ' and 'LLVM_NM' respectively. The
implementation also supports using precompiled tools via
'LLVM_NATIVE_TOOL_DIR'.
Differential Revision: https://reviews.llvm.org/D149119
Added:
Modified:
llvm/CMakeLists.txt
llvm/cmake/modules/AddLLVM.cmake
llvm/cmake/modules/CrossCompile.cmake
llvm/tools/llvm-nm/CMakeLists.txt
llvm/tools/llvm-readobj/CMakeLists.txt
llvm/tools/llvm-shlib/CMakeLists.txt
llvm/utils/extract_symbols.py
Removed:
################################################################################
diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt
index 5e2f08f0d2c8f..dfe81ad0e2ee3 100644
--- a/llvm/CMakeLists.txt
+++ b/llvm/CMakeLists.txt
@@ -1129,11 +1129,6 @@ endif( ${CMAKE_SYSTEM_NAME} MATCHES SunOS )
# use export_executable_symbols(target).
set(CMAKE_SHARED_LIBRARY_LINK_CXX_FLAGS "")
-set(LLVM_EXTRACT_SYMBOLS_FLAGS ""
- CACHE STRING "Additional options to pass to llvm/utils/extract_symbols.py.
- These cannot override the options set by cmake, but can add extra options
- such as --tools.")
-
include(AddLLVM)
include(TableGen)
diff --git a/llvm/cmake/modules/AddLLVM.cmake b/llvm/cmake/modules/AddLLVM.cmake
index 5357e543f09d4..91d2c8bac6e03 100644
--- a/llvm/cmake/modules/AddLLVM.cmake
+++ b/llvm/cmake/modules/AddLLVM.cmake
@@ -1246,10 +1246,18 @@ function(export_executable_symbols target)
else()
set(mangling itanium)
endif()
+ get_host_tool_path(llvm-nm LLVM_NM llvm_nm_exe llvm_nm_target)
+ get_host_tool_path(llvm-readobj LLVM_READOBJ llvm_readobj_exe llvm_readobj_target)
add_custom_command(OUTPUT ${exported_symbol_file}
- COMMAND "${Python3_EXECUTABLE}" ${LLVM_MAIN_SRC_DIR}/utils/extract_symbols.py ${LLVM_EXTRACT_SYMBOLS_FLAGS} --mangling=${mangling} ${static_libs} -o ${exported_symbol_file}
+ COMMAND "${Python3_EXECUTABLE}"
+ ${LLVM_MAIN_SRC_DIR}/utils/extract_symbols.py
+ --mangling=${mangling} ${static_libs}
+ -o ${exported_symbol_file}
+ --nm=${llvm_nm_exe}
+ --readobj=${llvm_readobj_exe}
WORKING_DIRECTORY ${LLVM_LIBRARY_OUTPUT_INTDIR}
- DEPENDS ${LLVM_MAIN_SRC_DIR}/utils/extract_symbols.py ${static_libs}
+ DEPENDS ${LLVM_MAIN_SRC_DIR}/utils/extract_symbols.py
+ ${static_libs} ${llvm_nm_target} ${llvm_readobj_target}
VERBATIM
COMMENT "Generating export list for ${target}")
add_llvm_symbol_exports( ${target} ${exported_symbol_file} )
@@ -2423,8 +2431,8 @@ function(find_first_existing_vc_file path out_var)
endif()
endfunction()
-function(setup_host_tool tool_name setting_name exe_var_name target_var_name)
- set(${setting_name}_DEFAULT "${tool_name}")
+function(get_host_tool_path tool_name setting_name exe_var_name target_var_name)
+ set(${setting_name}_DEFAULT "")
if(LLVM_NATIVE_TOOL_DIR)
if(EXISTS "${LLVM_NATIVE_TOOL_DIR}/${tool_name}${LLVM_HOST_EXECUTABLE_SUFFIX}")
@@ -2435,11 +2443,11 @@ function(setup_host_tool tool_name setting_name exe_var_name target_var_name)
set(${setting_name} "${${setting_name}_DEFAULT}" CACHE
STRING "Host ${tool_name} executable. Saves building if cross-compiling.")
- if(NOT ${setting_name} STREQUAL "${tool_name}")
+ if(${setting_name})
set(exe_name ${${setting_name}})
- set(target_name ${${setting_name}})
+ set(target_name "")
elseif(LLVM_USE_HOST_TOOLS)
- build_native_tool(${tool_name} exe_name DEPENDS ${tool_name})
+ get_native_tool_path(${tool_name} exe_name)
set(target_name ${exe_name})
else()
set(exe_name $<TARGET_FILE:${tool_name}>)
@@ -2448,3 +2456,12 @@ function(setup_host_tool tool_name setting_name exe_var_name target_var_name)
set(${exe_var_name} "${exe_name}" CACHE STRING "")
set(${target_var_name} "${target_name}" CACHE STRING "")
endfunction()
+
+function(setup_host_tool tool_name setting_name exe_var_name target_var_name)
+ get_host_tool_path(${tool_name} ${setting_name} ${exe_var_name} ${target_var_name})
+ # Set up a native tool build if necessary
+ if(LLVM_USE_HOST_TOOLS AND NOT ${setting_name})
+ build_native_tool(${tool_name} exe_name DEPENDS ${tool_name})
+ add_custom_target(${target_var_name} DEPENDS ${exe_name})
+ endif()
+endfunction()
diff --git a/llvm/cmake/modules/CrossCompile.cmake b/llvm/cmake/modules/CrossCompile.cmake
index 7c750f01e0cab..6af47b51d4c60 100644
--- a/llvm/cmake/modules/CrossCompile.cmake
+++ b/llvm/cmake/modules/CrossCompile.cmake
@@ -97,6 +97,15 @@ function(llvm_create_cross_target project_name target_name toolchain buildtype)
endfunction()
+function(get_native_tool_path target output_path_var)
+ if(CMAKE_CONFIGURATION_TYPES)
+ set(output_path "${${PROJECT_NAME}_NATIVE_BUILD}/Release/bin/${target}")
+ else()
+ set(output_path "${${PROJECT_NAME}_NATIVE_BUILD}/bin/${target}")
+ endif()
+ set(${output_path_var} ${output_path}${LLVM_HOST_EXECUTABLE_SUFFIX} PARENT_SCOPE)
+endfunction()
+
# Sets up a native build for a tool, used e.g. for cross-compilation and
# LLVM_OPTIMIZED_TABLEGEN. Always builds in Release.
# - target: The target to build natively
@@ -105,12 +114,7 @@ endfunction()
function(build_native_tool target output_path_var)
cmake_parse_arguments(ARG "" "" "DEPENDS" ${ARGN})
- if(CMAKE_CONFIGURATION_TYPES)
- set(output_path "${${PROJECT_NAME}_NATIVE_BUILD}/Release/bin/${target}")
- else()
- set(output_path "${${PROJECT_NAME}_NATIVE_BUILD}/bin/${target}")
- endif()
- set(output_path ${output_path}${LLVM_HOST_EXECUTABLE_SUFFIX})
+ get_native_tool_path(${target} output_path)
# Make chain of preceding actions
if(CMAKE_GENERATOR MATCHES "Visual Studio")
diff --git a/llvm/tools/llvm-nm/CMakeLists.txt b/llvm/tools/llvm-nm/CMakeLists.txt
index cd69712e2f7c5..ec04f1e9d2343 100644
--- a/llvm/tools/llvm-nm/CMakeLists.txt
+++ b/llvm/tools/llvm-nm/CMakeLists.txt
@@ -25,6 +25,8 @@ add_llvm_tool(llvm-nm
GENERATE_DRIVER
)
+setup_host_tool(llvm-nm LLVM_NM llvm_nm_exe llvm_nm_target)
+
if(LLVM_INSTALL_BINUTILS_SYMLINKS)
add_llvm_tool_symlink(nm llvm-nm)
endif()
diff --git a/llvm/tools/llvm-readobj/CMakeLists.txt b/llvm/tools/llvm-readobj/CMakeLists.txt
index c49526bd7b2c9..0051f87b3c103 100644
--- a/llvm/tools/llvm-readobj/CMakeLists.txt
+++ b/llvm/tools/llvm-readobj/CMakeLists.txt
@@ -30,6 +30,8 @@ add_llvm_tool(llvm-readobj
GENERATE_DRIVER
)
+setup_host_tool(llvm-readobj LLVM_READOBJ llvm_readobj_exe llvm_readobj_target)
+
add_llvm_tool_symlink(llvm-readelf llvm-readobj)
if(LLVM_INSTALL_BINUTILS_SYMLINKS)
diff --git a/llvm/tools/llvm-shlib/CMakeLists.txt b/llvm/tools/llvm-shlib/CMakeLists.txt
index 90e290435725c..4f6a2cbfbba30 100644
--- a/llvm/tools/llvm-shlib/CMakeLists.txt
+++ b/llvm/tools/llvm-shlib/CMakeLists.txt
@@ -166,21 +166,10 @@ if(LLVM_BUILD_LLVM_C_DYLIB AND MSVC)
set(GEN_SCRIPT ${CMAKE_CURRENT_SOURCE_DIR}/gen-msvc-exports.py)
set(LLVM_EXPORTED_SYMBOL_FILE ${LLVM_BINARY_DIR}/${CMAKE_CFG_INTDIR}/libllvm-c.exports)
- if(NOT LLVM_NM)
- if(CMAKE_CROSSCOMPILING)
- build_native_tool(llvm-nm llvm_nm)
- set(llvm_nm_target "${llvm_nm}")
- else()
- set(llvm_nm $<TARGET_FILE:llvm-nm>)
- set(llvm_nm_target llvm-nm)
- endif()
- else()
- set(llvm_nm ${LLVM_NM})
- set(llvm_nm_target "")
- endif()
+ get_host_tool_path(llvm-nm LLVM_NM llvm_nm_exe llvm_nm_target)
add_custom_command(OUTPUT ${LLVM_EXPORTED_SYMBOL_FILE}
- COMMAND "${Python3_EXECUTABLE}" ${GEN_SCRIPT} --libsfile ${LIBSFILE} ${GEN_UNDERSCORE} --nm "${llvm_nm}" -o ${LLVM_EXPORTED_SYMBOL_FILE}
+ COMMAND "${Python3_EXECUTABLE}" ${GEN_SCRIPT} --libsfile ${LIBSFILE} ${GEN_UNDERSCORE} --nm "${llvm_nm_exe}" -o ${LLVM_EXPORTED_SYMBOL_FILE}
DEPENDS ${LIB_NAMES} ${llvm_nm_target}
COMMENT "Generating export list for LLVM-C"
VERBATIM )
diff --git a/llvm/utils/extract_symbols.py b/llvm/utils/extract_symbols.py
index 7ec27b3f249ec..a2eabd3ab4f4c 100755
--- a/llvm/utils/extract_symbols.py
+++ b/llvm/utils/extract_symbols.py
@@ -23,30 +23,20 @@
import multiprocessing
import argparse
-# Define functions which extract a list of pairs of (symbols, is_def) from a
-# library using several
diff erent tools. We use subprocess.Popen and yield a
-# symbol at a time instead of using subprocess.check_output and returning a list
-# as, especially on Windows, waiting for the entire output to be ready can take
-# a significant amount of time.
-
-def dumpbin_get_symbols(lib):
- process = subprocess.Popen(['dumpbin','/symbols',lib], bufsize=1,
- stdout=subprocess.PIPE, stdin=subprocess.PIPE,
- universal_newlines=True)
- process.stdin.close()
- for line in process.stdout:
- # Look for external symbols
- match = re.match("^.+(SECT|UNDEF).+External\s+\|\s+(\S+).*$", line)
- if match:
- yield (match.group(2), match.group(1) != "UNDEF")
- process.wait()
-
-def nm_get_symbols(lib):
- # -P means the output is in portable format, and -g means we only get global
- # symbols.
- cmd = ['nm','-P','-g']
- if sys.platform.startswith('aix'):
- cmd += ['-Xany','-C','-p']
+# Define a function which extracts a list of pairs of (symbols, is_def) from a
+# library using llvm-nm becuase it can work both with regular and bitcode files.
+# We use subprocess.Popen and yield a symbol at a time instead of using
+# subprocess.check_output and returning a list as, especially on Windows, waiting
+# for the entire output to be ready can take a significant amount of time.
+def nm_get_symbols(tool, lib):
+ # '-P' means the output is in portable format,
+ # '-g' means we only get global symbols,
+ # '-Xany' enforce handling both 32- and 64-bit objects on AIX,
+ # '--no-demangle' ensure that C++ symbol names are not demangled; note
+ # that llvm-nm do not demangle by default, but the system nm on AIX does
+ # that, so the behavior may change in the future,
+ # '-p' do not waste time sorting the symbols.
+ cmd = [tool,'-P','-g','-Xany','--no-demangle','-p']
process = subprocess.Popen(cmd+[lib], bufsize=1,
stdout=subprocess.PIPE, stdin=subprocess.PIPE,
universal_newlines=True)
@@ -68,61 +58,10 @@ def nm_get_symbols(lib):
yield (match.group(1), False)
process.wait()
-def readobj_get_symbols(lib):
- process = subprocess.Popen(['llvm-readobj','--symbols',lib], bufsize=1,
- stdout=subprocess.PIPE, stdin=subprocess.PIPE,
- universal_newlines=True)
- process.stdin.close()
- for line in process.stdout:
- # When looking through the output of llvm-readobj we expect to see Name,
- # Section, then StorageClass, so record Name and Section when we see
- # them and decide if this is an external symbol when we see
- # StorageClass.
- match = re.search('Name: (\S+)', line)
- if match:
- name = match.group(1)
- match = re.search('Section: (\S+)', line)
- if match:
- section = match.group(1)
- match = re.search('StorageClass: (\S+)', line)
- if match:
- storageclass = match.group(1)
- if section != 'IMAGE_SYM_ABSOLUTE' and \
- storageclass == 'External':
- yield (name, section != 'IMAGE_SYM_UNDEFINED')
- process.wait()
-
-# Define functions which determine if the target is 32-bit Windows (as that's
+# Define a function which determines if the target is 32-bit Windows (as that's
# where calling convention name decoration happens).
-
-def dumpbin_is_32bit_windows(lib):
- # dumpbin /headers can output a huge amount of data (>100MB in a debug
- # build) so we read only up to the 'machine' line then close the output.
- process = subprocess.Popen(['dumpbin','/headers',lib], bufsize=1,
- stdout=subprocess.PIPE, stdin=subprocess.PIPE,
- universal_newlines=True)
- process.stdin.close()
- retval = False
- for line in process.stdout:
- match = re.match('.+machine \((\S+)\)', line)
- if match:
- retval = (match.group(1) == 'x86')
- break
- process.stdout.close()
- process.wait()
- return retval
-
-def objdump_is_32bit_windows(lib):
- output = subprocess.check_output(['objdump','-f',lib],
- universal_newlines=True)
- for line in output.splitlines():
- match = re.match('.+file format (\S+)', line)
- if match:
- return (match.group(1) == 'pe-i386')
- return False
-
-def readobj_is_32bit_windows(lib):
- output = subprocess.check_output(['llvm-readobj','--file-header',lib],
+def readobj_is_32bit_windows(tool, lib):
+ output = subprocess.check_output([tool,'--file-header',lib],
universal_newlines=True)
for line in output.splitlines():
match = re.match('Format: (\S+)', line)
@@ -130,11 +69,6 @@ def readobj_is_32bit_windows(lib):
return (match.group(1) == 'COFF-i386')
return False
-# On AIX, there isn't an easy way to detect 32-bit windows objects with the system toolchain,
-# so just assume false.
-def aix_is_32bit_windows(lib):
- return False
-
# MSVC mangles names to ?<identifier_mangling>@<type_mangling>. By examining the
# identifier/type mangling we can decide which symbols could possibly be
# required and which we can discard.
@@ -355,10 +289,10 @@ def parse_microsoft_mangling(arg):
return components
def extract_symbols(arg):
- get_symbols, should_keep_symbol, calling_convention_decoration, lib = arg
+ llvm_nm_path, should_keep_symbol, calling_convention_decoration, lib = arg
symbol_defs = dict()
symbol_refs = set()
- for (symbol, is_def) in get_symbols(lib):
+ for (symbol, is_def) in nm_get_symbols(llvm_nm_path, lib):
symbol = should_keep_symbol(symbol, calling_convention_decoration)
if symbol:
if is_def:
@@ -392,63 +326,38 @@ def get_template_name(sym, mangling):
# Not a template
return None
+def parse_tool_path(parser, tool, val):
+ try:
+ # Close std streams as we don't want any output and we don't
+ # want the process to wait for something on stdin.
+ p = subprocess.Popen([val], stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE,
+ stdin=subprocess.PIPE,
+ universal_newlines=True)
+ p.stdout.close()
+ p.stderr.close()
+ p.stdin.close()
+ p.wait()
+ return val
+ except Exception:
+ parser.error(f'Invalid path for {tool}')
+
if __name__ == '__main__':
- tool_exes = ['dumpbin','nm','objdump','llvm-readobj']
parser = argparse.ArgumentParser(
description='Extract symbols to export from libraries')
parser.add_argument('--mangling', choices=['itanium','microsoft'],
required=True, help='expected symbol mangling scheme')
- parser.add_argument('--tools', choices=tool_exes, nargs='*',
- help='tools to use to extract symbols and determine the'
- ' target')
+ parser.add_argument('--nm', metavar='path',
+ type=lambda x: parse_tool_path(parser, 'nm', x),
+ help='path to the llvm-nm executable')
+ parser.add_argument('--readobj', metavar='path',
+ type=lambda x: parse_tool_path(parser, 'readobj', x),
+ help='path to the llvm-readobj executable')
parser.add_argument('libs', metavar='lib', type=str, nargs='+',
help='libraries to extract symbols from')
parser.add_argument('-o', metavar='file', type=str, help='output to file')
args = parser.parse_args()
- # Determine the function to use to get the list of symbols from the inputs,
- # and the function to use to determine if the target is 32-bit windows.
- tools = { 'dumpbin' : (dumpbin_get_symbols, dumpbin_is_32bit_windows),
- 'nm' : (nm_get_symbols, None),
- 'objdump' : (None, objdump_is_32bit_windows),
- 'llvm-readobj' : (readobj_get_symbols, readobj_is_32bit_windows) }
- get_symbols = None
- is_32bit_windows = aix_is_32bit_windows if sys.platform.startswith('aix') else None
- # If we have a tools argument then use that for the list of tools to check
- if args.tools:
- tool_exes = args.tools
- # Find a tool to use by trying each in turn until we find one that exists
- # (subprocess.call will throw OSError when the program does not exist)
- get_symbols = None
- for exe in tool_exes:
- try:
- # Close std streams as we don't want any output and we don't
- # want the process to wait for something on stdin.
- p = subprocess.Popen([exe], stdout=subprocess.PIPE,
- stderr=subprocess.PIPE,
- stdin=subprocess.PIPE,
- universal_newlines=True)
- p.stdout.close()
- p.stderr.close()
- p.stdin.close()
- p.wait()
- # Keep going until we have a tool to use for both get_symbols and
- # is_32bit_windows
- if not get_symbols:
- get_symbols = tools[exe][0]
- if not is_32bit_windows:
- is_32bit_windows = tools[exe][1]
- if get_symbols and is_32bit_windows:
- break
- except OSError:
- continue
- if not get_symbols:
- print("Couldn't find a program to read symbols with", file=sys.stderr)
- exit(1)
- if not is_32bit_windows:
- print("Couldn't find a program to determining the target", file=sys.stderr)
- exit(1)
-
# How we determine which symbols to keep and which to discard depends on
# the mangling scheme
if args.mangling == 'microsoft':
@@ -478,7 +387,7 @@ def get_template_name(sym, mangling):
# Check if calling convention decoration is used by inspecting the first
# library in the list
- calling_convention_decoration = is_32bit_windows(libs[0])
+ calling_convention_decoration = readobj_is_32bit_windows(args.readobj, libs[0])
# Extract symbols from libraries in parallel. This is a huge time saver when
# doing a debug build, as there are hundreds of thousands of symbols in each
@@ -489,7 +398,7 @@ def get_template_name(sym, mangling):
# use a lambda or local function definition as that doesn't work on
# windows, so create a list of tuples which duplicates the arguments
# that are the same in all calls.
- vals = [(get_symbols, should_keep_symbol, calling_convention_decoration, x) for x in libs]
+ vals = [(args.nm, should_keep_symbol, calling_convention_decoration, x) for x in libs]
# Do an async map then wait for the result to make sure that
# KeyboardInterrupt gets caught correctly (see
# http://bugs.python.org/issue8296)
More information about the llvm-commits
mailing list