r257934 - [CMake] Support generation of linker order files using dtrace
Sean Silva via cfe-commits
cfe-commits at lists.llvm.org
Mon Mar 21 21:21:11 PDT 2016
On Fri, Jan 15, 2016 at 1:21 PM, Chris Bieneman via cfe-commits <
cfe-commits at lists.llvm.org> wrote:
> Author: cbieneman
> Date: Fri Jan 15 15:21:12 2016
> New Revision: 257934
>
> URL: http://llvm.org/viewvc/llvm-project?rev=257934&view=rev
> Log:
> [CMake] Support generation of linker order files using dtrace
>
> Summary:
> This patch extends the lit-based perf-training tooling supplied for PGO
> data generation to also generate linker order files using dtrace.
>
> This patch should work on any system that has dtrace. If CMake can find
> the dtrace tool it will generate a target 'generate-order-file' which will
> run the per-training tests wrapped by dtrace to capture function entries.
> There are several algorithms implemented for sorting the order files which
> can be experimented with for best performance. The dtrace wrapper also
> supports bot oneshot and pid probes.
>
> The perf-helper.py changes to support order file construction are ported
> from internal changes by ddunbar; he gets all the credit for the hard work
> here, I just copy and pasted.
>
> Note: I've tested these patches on FreeBSD and OS X 10.10.
>
> Reviewers: ddunbar, bogner, silvas
>
> Subscribers: llvm-commits, emaste
>
> Differential Revision: http://reviews.llvm.org/D16134
>
> Added:
> cfe/trunk/utils/perf-training/order-files.lit.cfg
> cfe/trunk/utils/perf-training/order-files.lit.site.cfg.in
> Modified:
> cfe/trunk/utils/perf-training/CMakeLists.txt
> cfe/trunk/utils/perf-training/perf-helper.py
>
> Modified: cfe/trunk/utils/perf-training/CMakeLists.txt
> URL:
> http://llvm.org/viewvc/llvm-project/cfe/trunk/utils/perf-training/CMakeLists.txt?rev=257934&r1=257933&r2=257934&view=diff
>
> ==============================================================================
> --- cfe/trunk/utils/perf-training/CMakeLists.txt (original)
> +++ cfe/trunk/utils/perf-training/CMakeLists.txt Fri Jan 15 15:21:12 2016
> @@ -1,24 +1,24 @@
> -if(LLVM_BUILD_INSTRUMENTED)
> - if (CMAKE_CFG_INTDIR STREQUAL ".")
> - set(LLVM_BUILD_MODE ".")
> - else ()
> - set(LLVM_BUILD_MODE "%(build_mode)s")
> - endif ()
> +if (CMAKE_CFG_INTDIR STREQUAL ".")
> + set(LLVM_BUILD_MODE ".")
> +else ()
> + set(LLVM_BUILD_MODE "%(build_mode)s")
> +endif ()
>
> - string(REPLACE ${CMAKE_CFG_INTDIR} ${LLVM_BUILD_MODE} CLANG_TOOLS_DIR
> ${LLVM_RUNTIME_OUTPUT_INTDIR})
> +string(REPLACE ${CMAKE_CFG_INTDIR} ${LLVM_BUILD_MODE} CLANG_TOOLS_DIR
> ${LLVM_RUNTIME_OUTPUT_INTDIR})
>
> +if(LLVM_BUILD_INSTRUMENTED)
> configure_lit_site_cfg(
> ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.in
> - ${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg
> + ${CMAKE_CURRENT_BINARY_DIR}/pgo-data/lit.site.cfg
> )
>
> add_lit_testsuite(generate-profraw "Generating clang PGO data"
> - ${CMAKE_CURRENT_BINARY_DIR}
> + ${CMAKE_CURRENT_BINARY_DIR}/pgo-data/
> DEPENDS clang clear-profraw
> )
>
> add_custom_target(clear-profraw
> - COMMAND ${PYTHON_EXECUTABLE}
> ${CMAKE_CURRENT_SOURCE_DIR}/perf-helper.py clean ${CMAKE_CURRENT_BINARY_DIR}
> + COMMAND ${PYTHON_EXECUTABLE}
> ${CMAKE_CURRENT_SOURCE_DIR}/perf-helper.py clean
> ${CMAKE_CURRENT_BINARY_DIR} profraw
> COMMENT "Clearing old profraw data")
>
> if(NOT LLVM_PROFDATA)
> @@ -34,3 +34,26 @@ if(LLVM_BUILD_INSTRUMENTED)
> COMMENT "Merging profdata"
> DEPENDS generate-profraw)
> endif()
> +
> +find_program(DTRACE dtrace)
> +if(DTRACE)
> + configure_lit_site_cfg(
> + ${CMAKE_CURRENT_SOURCE_DIR}/order-files.lit.site.cfg.in
> + ${CMAKE_CURRENT_BINARY_DIR}/order-files/lit.site.cfg
> + )
> +
> + add_lit_testsuite(generate-dtrace-logs "Generating clang dtrace data"
> + ${CMAKE_CURRENT_BINARY_DIR}/order-files/
> + DEPENDS clang clear-dtrace-logs
> + )
> +
> + add_custom_target(clear-dtrace-logs
> + COMMAND ${PYTHON_EXECUTABLE}
> ${CMAKE_CURRENT_SOURCE_DIR}/perf-helper.py clean
> ${CMAKE_CURRENT_BINARY_DIR} dtrace
> + COMMENT "Clearing old dtrace data")
> +
> +
> + add_custom_target(generate-order-file
> + COMMAND ${PYTHON_EXECUTABLE}
> ${CMAKE_CURRENT_SOURCE_DIR}/perf-helper.py gen-order-file --binary
> $<TARGET_FILE:clang> --output ${CMAKE_CURRENT_BINARY_DIR}/clang.order
> ${CMAKE_CURRENT_BINARY_DIR}
> + COMMENT "Generating order file"
> + DEPENDS generate-dtrace-logs)
> +endif()
>
> Added: cfe/trunk/utils/perf-training/order-files.lit.cfg
> URL:
> http://llvm.org/viewvc/llvm-project/cfe/trunk/utils/perf-training/order-files.lit.cfg?rev=257934&view=auto
>
> ==============================================================================
> --- cfe/trunk/utils/perf-training/order-files.lit.cfg (added)
> +++ cfe/trunk/utils/perf-training/order-files.lit.cfg Fri Jan 15 15:21:12
> 2016
> @@ -0,0 +1,39 @@
> +# -*- Python -*-
> +
> +from lit import Test
> +import lit.formats
> +import lit.util
> +import os
> +
> +def getSysrootFlagsOnDarwin(config, lit_config):
> + # On Darwin, support relocatable SDKs by providing Clang with a
> + # default system root path.
> + if 'darwin' in config.target_triple:
> + try:
> + out = lit.util.capture(['xcrun', '--show-sdk-path']).strip()
> + res = 0
> + except OSError:
> + res = -1
> + if res == 0 and out:
> + sdk_path = out
> + lit_config.note('using SDKROOT: %r' % sdk_path)
> + return '-isysroot %s' % sdk_path
> + return ''
> +
> +sysroot_flags = getSysrootFlagsOnDarwin(config, lit_config)
> +
> +config.clang = os.path.realpath(lit.util.which('clang',
> config.clang_tools_dir)).replace('\\', '/')
> +
> +config.name = 'Clang Perf Training'
> +config.suffixes = ['.c', '.cpp', '.m', '.mm', '.cu', '.ll', '.cl', '.s',
> '.S', '.modulemap']
> +
> +dtrace_wrapper = '%s %s/perf-helper.py dtrace' % (config.python_exe,
> config.test_source_root)
> +
> +use_lit_shell = os.environ.get("LIT_USE_INTERNAL_SHELL")
> +config.test_format = lit.formats.ShTest(use_lit_shell == "0")
> +config.substitutions.append( ('%clang_cpp', ' %s %s --driver-mode=cpp %s
> ' % (dtrace_wrapper, config.clang, sysroot_flags)))
> +config.substitutions.append( ('%clang_cc1', ' %s %s -cc1 %s ' %
> (dtrace_wrapper, config.clang, sysroot_flags)))
> +config.substitutions.append( ('%clang', ' %s %s %s ' % (dtrace_wrapper,
> config.clang, sysroot_flags) ) )
> +config.substitutions.append( ('%test_root', config.test_exec_root ) )
> +
> +
>
> Added: cfe/trunk/utils/perf-training/order-files.lit.site.cfg.in
> URL:
> http://llvm.org/viewvc/llvm-project/cfe/trunk/utils/perf-training/order-files.lit.site.cfg.in?rev=257934&view=auto
>
> ==============================================================================
> --- cfe/trunk/utils/perf-training/order-files.lit.site.cfg.in (added)
> +++ cfe/trunk/utils/perf-training/order-files.lit.site.cfg.in Fri Jan 15
> 15:21:12 2016
> @@ -0,0 +1,21 @@
> +import sys
> +
> +## Autogenerated by LLVM/Clang configuration.
> +# Do not edit!
> +config.clang_tools_dir = "@CLANG_TOOLS_DIR@"
> +config.test_exec_root = "@CMAKE_CURRENT_BINARY_DIR@"
> +config.test_source_root = "@CMAKE_CURRENT_SOURCE_DIR@"
> +config.target_triple = "@TARGET_TRIPLE@"
> +config.python_exe = "@PYTHON_EXECUTABLE@"
> +
> +# Support substitution of the tools and libs dirs with user parameters.
> This is
> +# used when we can't determine the tool dir at configuration time.
> +try:
> + config.clang_tools_dir = config.clang_tools_dir % lit_config.params
> +except KeyError:
> + e = sys.exc_info()[1]
> + key, = e.args
> + lit_config.fatal("unable to find %r parameter, use
> '--param=%s=VALUE'" % (key,key))
> +
> +# Let the main config do the real work.
> +lit_config.load_config(config, "@CLANG_SOURCE_DIR@
> /utils/perf-training/order-files.lit.cfg")
>
> Modified: cfe/trunk/utils/perf-training/perf-helper.py
> URL:
> http://llvm.org/viewvc/llvm-project/cfe/trunk/utils/perf-training/perf-helper.py?rev=257934&r1=257933&r2=257934&view=diff
>
> ==============================================================================
> --- cfe/trunk/utils/perf-training/perf-helper.py (original)
> +++ cfe/trunk/utils/perf-training/perf-helper.py Fri Jan 15 15:21:12 2016
> @@ -10,33 +10,336 @@
> import sys
> import os
> import subprocess
> +import argparse
> +import time
> +import bisect
>
> -def findProfrawFiles(path):
> - profraw_files = []
> +def findFilesWithExtension(path, extension):
> + filenames = []
> for root, dirs, files in os.walk(path):
> for filename in files:
> - if filename.endswith(".profraw"):
> - profraw_files.append(os.path.join(root, filename))
> - return profraw_files
> + if filename.endswith(extension):
> + filenames.append(os.path.join(root, filename))
> + return filenames
>
> def clean(args):
> - if len(args) != 1:
> - print 'Usage: %s clean <path>\n\tRemoves all *.profraw files from
> <path>.' % __file__
> + if len(args) != 2:
> + print 'Usage: %s clean <path> <extension>' % __file__
> + print '\tRemoves all files with extension from <path>.'
> return 1
> - for profraw in findProfrawFiles(args[0]):
> - os.remove(profraw)
> + for filename in findFilesWithExtension(args[0], args[1]):
> + os.remove(filename)
> return 0
>
> def merge(args):
> if len(args) != 3:
> - print 'Usage: %s clean <llvm-profdata> <output> <path>\n\tMerges all
> profraw files from path into output.' % __file__
> + print 'Usage: %s clean <llvm-profdata> <output> <path>\n' % __file__
> + print '\tMerges all profraw files from path into output.'
> return 1
> cmd = [args[0], 'merge', '-o', args[1]]
> - cmd.extend(findProfrawFiles(args[2]))
> + cmd.extend(findFilesWithExtension(args[2], "profraw"))
> subprocess.check_call(cmd)
> return 0
>
> -commands = {'clean' : clean, 'merge' : merge}
> +def dtrace(args):
> + parser = argparse.ArgumentParser(prog='perf-helper dtrace',
> + description='dtrace wrapper for order file generation')
> + parser.add_argument('--buffer-size', metavar='size', type=int,
> required=False,
> + default=1, help='dtrace buffer size in MB (default 1)')
> + parser.add_argument('--use-oneshot', required=False,
> action='store_true',
> + help='Use dtrace\'s oneshot probes')
> + parser.add_argument('--use-ustack', required=False, action='store_true',
> + help='Use dtrace\'s ustack to print function names')
> + parser.add_argument('cmd', nargs='*', help='')
> +
> + # Use python's arg parser to handle all leading option arguments, but
> pass
> + # everything else through to dtrace
> + first_cmd = next(arg for arg in args if not arg.startswith("--"))
> + last_arg_idx = args.index(first_cmd)
> +
> + opts = parser.parse_args(args[:last_arg_idx])
> + cmd = args[last_arg_idx:]
> +
> + if opts.use_oneshot:
> + target = "oneshot$target:::entry"
> + else:
> + target = "pid$target:::entry"
> + predicate = '%s/probemod=="%s"/' % (target, os.path.basename(args[0]))
> + log_timestamp = 'printf("dtrace-TS: %d\\n", timestamp)'
> + if opts.use_ustack:
> + action = 'ustack(1);'
> + else:
> + action = 'printf("dtrace-Symbol: %s\\n", probefunc);'
> + dtrace_script = "%s { %s; %s }" % (predicate, log_timestamp, action)
> +
> + dtrace_args = []
> + if not os.geteuid() == 0:
> + print 'Script must be run as root, or you must add the following to
> your sudoers:'
> + print '%%admin ALL=(ALL) NOPASSWD: /usr/sbin/dtrace'
> + dtrace_args.append("sudo")
> +
> + dtrace_args.extend((
> + 'dtrace', '-xevaltime=exec',
> + '-xbufsize=%dm' % (opts.buffer_size),
> + '-q', '-n', dtrace_script,
> + '-c', ' '.join(cmd)))
> +
> + if sys.platform == "darwin":
> + dtrace_args.append('-xmangled')
> +
> + f = open("%d.dtrace" % os.getpid(), "w")
>
Please use `with` for this file like you do below.
> + start_time = time.time()
> + subprocess.check_call(dtrace_args, stdout=f, stderr=subprocess.PIPE)
> + elapsed = time.time() - start_time
> + print "... data collection took %.4fs" % elapsed
> +
> + return 0
> +
> +def parse_dtrace_symbol_file(path, all_symbols, all_symbols_set,
> + missing_symbols, opts):
> + def fix_mangling(symbol):
> + if sys.platform == "darwin":
> + if symbol[0] != '_' and symbol != 'start':
> + symbol = '_' + symbol
> + return symbol
> +
> + def get_symbols_with_prefix(symbol):
> + start_index = bisect.bisect_left(all_symbols, symbol)
> + for s in all_symbols[start_index:]:
> + if not s.startswith(symbol):
> + break
> + yield s
> +
> + # Extract the list of symbols from the given file, which is assumed to
> be
> + # the output of a dtrace run logging either probefunc or ustack(1) and
> + # nothing else. The dtrace -xdemangle option needs to be used.
> + #
> + # This is particular to OS X at the moment, because of the '_' handling.
> + with open(path) as f:
> + current_timestamp = None
> + for ln in f:
> + # Drop leading and trailing whitespace.
> + ln = ln.strip()
> + if not ln.startswith("dtrace-"):
> + continue
> +
> + # If this is a timestamp specifier, extract it.
> + if ln.startswith("dtrace-TS: "):
> + _,data = ln.split(': ', 1)
> + if not data.isdigit():
> + print >>sys.stderr, (
> + "warning: unrecognized timestamp line %r, ignoring" % ln)
> + continue
> + current_timestamp = int(data)
> + continue
> + elif ln.startswith("dtrace-Symbol: "):
> +
> + _,ln = ln.split(': ', 1)
> + if not ln:
> + continue
> +
> + # If there is a '`' in the line, assume it is a ustack(1) entry in
> + # the form of <modulename>`<modulefunc>, where <modulefunc> is
> never
> + # truncated (but does need the mangling patched).
> + if '`' in ln:
> + yield (current_timestamp, fix_mangling(ln.split('`',1)[1]))
> + continue
> +
> + # Otherwise, assume this is a probefunc printout. DTrace on OS X
> + # seems to have a bug where it prints the mangled version of
> symbols
> + # which aren't C++ mangled. We just add a '_' to anything but
> start
> + # which doesn't already have a '_'.
> + symbol = fix_mangling(ln)
> +
> + # If we don't know all the symbols, or the symbol is one of them,
> + # just return it.
> + if not all_symbols_set or symbol in all_symbols_set:
> + yield (current_timestamp, symbol)
> + continue
> +
> + # Otherwise, we have a symbol name which isn't present in the
> + # binary. We assume it is truncated, and try to extend it.
> +
> + # Get all the symbols with this prefix.
> + possible_symbols = list(get_symbols_with_prefix(symbol))
> + if not possible_symbols:
> + continue
> +
> + # If we found too many possible symbols, ignore this as a prefix.
> + if len(possible_symbols) > 100:
> + print >>sys.stderr, (
> + "warning: ignoring symbol %r " % symbol +
> + "(no match and too many possible suffixes)")
>
Please add `from __future__ import print_function` at the top of the file.
Then this becomes `print(..., file=sys.stderr)` which is much more readable.
> + continue
> +
> + # Report that we resolved a missing symbol.
> + if opts.show_missing_symbols and symbol not in missing_symbols:
> + print >>sys.stderr, ( "warning: resolved missing symbol %r" %
> symbol)
> + missing_symbols.add(symbol)
> +
> + # Otherwise, treat all the possible matches as having occurred.
> This
> + # is an over-approximation, but it should be ok in practice.
> + for s in possible_symbols:
> + yield (current_timestamp, s)
> +
> +def check_output(*popen_args, **popen_kwargs):
> + p = subprocess.Popen(stdout=subprocess.PIPE, *popen_args,
> **popen_kwargs)
> + stdout,stderr = p.communicate()
> + if p.wait() != 0:
> + raise RuntimeError("process failed")
> + return stdout
>
Any reason you are defining this function which is named the same as the
one in the subprocess module? Maybe add a comment?
-- Sean Silva
> +
> +def uniq(list):
> + seen = set()
> + for item in list:
> + if item not in seen:
> + yield item
> + seen.add(item)
> +
> +def form_by_call_order(symbol_lists):
> + # Simply strategy, just return symbols in order of occurrence, even
> across
> + # multiple runs.
> + return uniq(s for symbols in symbol_lists for s in symbols)
> +
> +def form_by_call_order_fair(symbol_lists):
> + # More complicated strategy that tries to respect the call order across
> all
> + # of the test cases, instead of giving a huge preference to the first
> test
> + # case.
> +
> + # First, uniq all the lists.
> + uniq_lists = [list(uniq(symbols)) for symbols in symbol_lists]
> +
> + # Compute the successors for each list.
> + succs = {}
> + for symbols in uniq_lists:
> + for a,b in zip(symbols[:-1], symbols[1:]):
> + succs[a] = items = succs.get(a, [])
> + if b not in items:
> + items.append(b)
> +
> + # Emit all the symbols, but make sure to always emit all successors
> from any
> + # call list whenever we see a symbol.
> + #
> + # There isn't much science here, but this sometimes works better than
> the
> + # more naive strategy. Then again, sometimes it doesn't so more
> research is
> + # probably needed.
> + return uniq(s
> + for symbols in symbol_lists
> + for node in symbols
> + for s in ([node] + succs.get(node,[])))
> +
> +def form_by_frequency(symbol_lists):
> + # Form the order file by just putting the most commonly occurring
> symbols
> + # first. This assumes the data files didn't use the oneshot dtrace
> method.
> +
> + counts = {}
> + for symbols in symbol_lists:
> + for a in symbols:
> + counts[a] = counts.get(a,0) + 1
> +
> + by_count = counts.items()
> + by_count.sort(key = lambda (_,n): -n)
> + return [s for s,n in by_count]
> +
> +def form_by_random(symbol_lists):
> + # Randomize the symbols.
> + merged_symbols = uniq(s for symbols in symbol_lists
> + for s in symbols)
> + random.shuffle(merged_symbols)
> + return merged_symbols
> +
> +def form_by_alphabetical(symbol_lists):
> + # Alphabetize the symbols.
> + merged_symbols = list(set(s for symbols in symbol_lists for s in
> symbols))
> + merged_symbols.sort()
> + return merged_symbols
> +
> +methods = dict((name[len("form_by_"):],value)
> + for name,value in locals().items() if name.startswith("form_by_"))
> +
> +def genOrderFile(args):
> + parser = argparse.ArgumentParser(
> + "%prog [options] <dtrace data file directories>]")
> + parser.add_argument('input', nargs='+', help='')
> + parser.add_argument("--binary", metavar="PATH", type=str,
> dest="binary_path",
> + help="Path to the binary being ordered (for getting all symbols)",
> + default=None)
> + parser.add_argument("--output", dest="output_path",
> + help="path to output order file to write", default=None,
> required=True,
> + metavar="PATH")
> + parser.add_argument("--show-missing-symbols",
> dest="show_missing_symbols",
> + help="show symbols which are 'fixed up' to a valid name (requires
> --binary)",
> + action="store_true", default=None)
> + parser.add_argument("--output-unordered-symbols",
> + dest="output_unordered_symbols_path",
> + help="write a list of the unordered symbols to PATH (requires
> --binary)",
> + default=None, metavar="PATH")
> + parser.add_argument("--method", dest="method",
> + help="order file generation method to use", choices=methods.keys(),
> + default='call_order')
> + opts = parser.parse_args(args)
> +
> + # If the user gave us a binary, get all the symbols in the binary by
> + # snarfing 'nm' output.
> + if opts.binary_path is not None:
> + output = check_output(['nm', '-P', opts.binary_path])
> + lines = output.split("\n")
> + all_symbols = [ln.split(' ',1)[0]
> + for ln in lines
> + if ln.strip()]
> + print "found %d symbols in binary" % len(all_symbols)
> + all_symbols.sort()
> + else:
> + all_symbols = []
> + all_symbols_set = set(all_symbols)
> +
> + # Compute the list of input files.
> + input_files = []
> + for dirname in opts.input:
> + input_files.extend(findFilesWithExtension(dirname, "dtrace"))
> +
> + # Load all of the input files.
> + print "loading from %d data files" % len(input_files)
> + missing_symbols = set()
> + timestamped_symbol_lists = [
> + list(parse_dtrace_symbol_file(path, all_symbols, all_symbols_set,
> + missing_symbols, opts))
> + for path in input_files]
> +
> + # Reorder each symbol list.
> + symbol_lists = []
> + for timestamped_symbols_list in timestamped_symbol_lists:
> + timestamped_symbols_list.sort()
> + symbol_lists.append([symbol for _,symbol in timestamped_symbols_list])
> +
> + # Execute the desire order file generation method.
> + method = methods.get(opts.method)
> + result = list(method(symbol_lists))
> +
> + # Report to the user on what percentage of symbols are present in the
> order
> + # file.
> + num_ordered_symbols = len(result)
> + if all_symbols:
> + print >>sys.stderr, "note: order file contains %d/%d symbols
> (%.2f%%)" % (
> + num_ordered_symbols, len(all_symbols),
> + 100.*num_ordered_symbols/len(all_symbols))
> +
> + if opts.output_unordered_symbols_path:
> + ordered_symbols_set = set(result)
> + with open(opts.output_unordered_symbols_path, 'w') as f:
> + f.write("\n".join(s for s in all_symbols if s not in
> ordered_symbols_set))
> +
> + # Write the order file.
> + with open(opts.output_path, 'w') as f:
> + f.write("\n".join(result))
> + f.write("\n")
> +
> + return 0
> +
> +commands = {'clean' : clean,
> + 'merge' : merge,
> + 'dtrace' : dtrace,
> + 'gen-order-file' : genOrderFile}
>
> def main():
> f = commands[sys.argv[1]]
>
>
> _______________________________________________
> cfe-commits mailing list
> cfe-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20160321/88a425d0/attachment-0001.html>
More information about the cfe-commits
mailing list