r182596 - clang-format integration for git.
Nico Weber
thakis at chromium.org
Fri Jun 21 14:00:12 PDT 2013
I just had the chance to try this. One question below.
On Thu, May 23, 2013 at 10:53 AM, Daniel Jasper <djasper at google.com> wrote:
> Author: djasper
> Date: Thu May 23 12:53:42 2013
> New Revision: 182596
>
> URL: http://llvm.org/viewvc/llvm-project?rev=182596&view=rev
> Log:
> clang-format integration for git.
>
> Put this somewhere on your path and use:
>
> git clang-format
>
> Awesome work by Mark Lodato. Many thanks!
>
> Added:
> cfe/trunk/tools/clang-format/git-clang-format (with props)
>
> Added: cfe/trunk/tools/clang-format/git-clang-format
> URL:
> http://llvm.org/viewvc/llvm-project/cfe/trunk/tools/clang-format/git-clang-format?rev=182596&view=auto
>
> ==============================================================================
> --- cfe/trunk/tools/clang-format/git-clang-format (added)
> +++ cfe/trunk/tools/clang-format/git-clang-format Thu May 23 12:53:42 2013
> @@ -0,0 +1,513 @@
> +#!/usr/bin/python
> +#
> +#===- git-clang-format - ClangFormat Git Integration ---------*- python
> -*--===#
> +#
> +# The LLVM Compiler Infrastructure
> +#
> +# This file is distributed under the University of Illinois Open Source
> +# License. See LICENSE.TXT for details.
> +#
>
> +#===------------------------------------------------------------------------===#
> +
> +r"""
> +clang-format git integration
> +============================
> +
> +This file provides a clang-format integration for git. Put it somewhere
> in your
> +path and ensure that it is executable. Then, "git clang-format" will
> invoke
> +clang-format on the changes in current files or a specific commit.
> +
> +For further details, run:
> +git clang-format -h
> +
> +Requires Python 2.7
> +"""
> +
> +import argparse
> +import collections
> +import contextlib
> +import errno
> +import os
> +import re
> +import subprocess
> +import sys
> +
> +usage = 'git clang-format [OPTIONS] [<commit>] [--] [<file>...]'
> +
> +desc = '''
> +Run clang-format on all lines that differ between the working directory
> +and <commit>, which defaults to HEAD. Changes are only applied to the
> working
> +directory.
> +
> +The following git-config settings set the default of the corresponding
> option:
> + clangFormat.binary
> + clangFormat.commit
> + clangFormat.extension
> + clangFormat.style
> +'''
> +
> +# Name of the temporary index file in which save the output of
> clang-format.
> +# This file is created within the .git directory.
> +temp_index_basename = 'clang-format-index'
> +
> +
> +Range = collections.namedtuple('Range', 'start, count')
> +
> +
> +def main():
> + config = load_git_config()
> +
> + # In order to keep '--' yet allow options after positionals, we need to
> + # check for '--' ourselves. (Setting nargs='*' throws away the '--',
> while
> + # nargs=argparse.REMAINDER disallows options after positionals.)
> + argv = sys.argv[1:]
> + try:
> + idx = argv.index('--')
> + except ValueError:
> + dash_dash = []
> + else:
> + dash_dash = argv[idx:]
> + argv = argv[:idx]
> +
> + default_extensions = ','.join([
> + # From clang/lib/Frontend/FrontendOptions.cpp, all lower case
> + 'c', 'h', # C
> + 'm', # ObjC
> + 'mm', # ObjC++
> + 'cc', 'cp', 'cpp', 'c++', 'cxx', 'hpp', # C++
> + ])
> +
> + p = argparse.ArgumentParser(
> + usage=usage, formatter_class=argparse.RawDescriptionHelpFormatter,
> + description=desc)
> + p.add_argument('--binary',
> + default=config.get('clangformat.binary', 'clang-format'),
> + help='path to clang-format'),
> + p.add_argument('--commit',
> + default=config.get('clangformat.commit', 'HEAD'),
> + help='default commit to use if none is specified'),
> + p.add_argument('--diff', action='store_true',
> + help='print a diff instead of applying the changes')
> + p.add_argument('--extensions',
> + default=config.get('clangformat.extensions',
> + default_extensions),
> + help=('comma-separated list of file extensions to
> format, '
> + 'excluding the period and case-insensitive')),
> + p.add_argument('-f', '--force', action='store_true',
> + help='allow changes to unstaged files')
> + p.add_argument('-p', '--patch', action='store_true',
> + help='select hunks interactively')
> + p.add_argument('-q', '--quiet', action='count', default=0,
> + help='print less information')
> + p.add_argument('--style',
> + default=config.get('clangformat.style', None),
> + help='passed to clang-format'),
> + p.add_argument('-v', '--verbose', action='count', default=0,
> + help='print extra information')
> + # We gather all the remaining positional arguments into 'args' since we
> need
> + # to use some heuristics to determine whether or not <commit> was
> present.
> + # However, to print pretty messages, we make use of metavar and help.
> + p.add_argument('args', nargs='*', metavar='<commit>',
> + help='revision from which to compute the diff')
> + p.add_argument('ignored', nargs='*', metavar='<file>...',
> + help='if specified, only consider differences in these
> files')
> + opts = p.parse_args(argv)
> +
> + opts.verbose -= opts.quiet
> + del opts.quiet
> +
> + commit, files = interpret_args(opts.args, dash_dash, opts.commit)
> + changed_lines = compute_diff_and_extract_lines(commit, files)
> + if opts.verbose >= 1:
> + ignored_files = set(changed_lines)
> + filter_by_extension(changed_lines, opts.extensions.lower().split(','))
> + if opts.verbose >= 1:
> + ignored_files.difference_update(changed_lines)
> + if ignored_files:
> + print 'Ignoring changes in the following files (wrong extension):'
> + for filename in ignored_files:
> + print ' ', filename
> + if changed_lines:
> + print 'Running clang-format on the following files:'
> + for filename in changed_lines:
> + print ' ', filename
> + if not changed_lines:
> + print 'no modified files to format'
> + return
> + # The computed diff outputs absolute paths, so we must cd before
> accessing
> + # those files.
> + cd_to_toplevel()
> + changed_bytes = lines_to_bytes(changed_lines)
> + old_tree = create_tree_from_workdir(changed_bytes)
> + new_tree = run_clang_format_and_save_to_tree(changed_bytes,
> + binary=opts.binary,
> + style=opts.style)
> + if opts.verbose >= 1:
> + print 'old tree:', old_tree
> + print 'new tree:', new_tree
> + if old_tree == new_tree:
> + if opts.verbose >= 0:
> + print 'clang-format did not modify any files'
> + elif opts.diff:
> + print_diff(old_tree, new_tree)
> + else:
> + changed_files = apply_changes(old_tree, new_tree, force=opts.force,
> + patch_mode=opts.patch)
> + if (opts.verbose >= 0 and not opts.patch) or opts.verbose >= 1:
> + print 'changed files:'
> + for filename in changed_files:
> + print ' ', filename
> +
> +
> +def load_git_config(non_string_options=None):
> + """Return the git configuration as a dictionary.
> +
> + All options are assumed to be strings unless in `non_string_options`,
> in which
> + is a dictionary mapping option name (in lower case) to either "--bool"
> or
> + "--int"."""
> + if non_string_options is None:
> + non_string_options = {}
> + out = {}
> + for entry in run('git', 'config', '--list', '--null').split('\0'):
> + if entry:
> + name, value = entry.split('\n', 1)
> + if name in non_string_options:
> + value = run('git', 'config', non_string_options[name], name)
> + out[name] = value
> + return out
> +
> +
> +def interpret_args(args, dash_dash, default_commit):
> + """Interpret `args` as "[commit] [--] [files...]" and return (commit,
> files).
> +
> + It is assumed that "--" and everything that follows has been removed
> from
> + args and placed in `dash_dash`.
> +
> + If "--" is present (i.e., `dash_dash` is non-empty), the argument to its
> + left (if present) is taken as commit. Otherwise, the first argument is
> + checked if it is a commit or a file. If commit is not given,
> + `default_commit` is used."""
> + if dash_dash:
> + if len(args) == 0:
> + commit = default_commit
> + elif len(args) > 1:
> + die('at most one commit allowed; %d given' % len(args))
> + else:
> + commit = args[0]
> + object_type = get_object_type(commit)
> + if object_type not in ('commit', 'tag'):
> + if object_type is None:
> + die("'%s' is not a commit" % commit)
> + else:
> + die("'%s' is a %s, but a commit was expected" % (commit,
> object_type))
> + files = dash_dash[1:]
> + elif args:
> + if disambiguate_revision(args[0]):
> + commit = args[0]
> + files = args[1:]
> + else:
> + commit = default_commit
> + files = args
> + else:
> + commit = default_commit
> + files = []
> + return commit, files
> +
> +
> +def disambiguate_revision(value):
> + """Returns True if `value` is a revision, False if it is a file, or
> dies."""
> + # If `value` is ambiguous (neither a commit nor a file), the following
> + # command will die with an appropriate error message.
> + run('git', 'rev-parse', value, verbose=False)
> + object_type = get_object_type(value)
> + if object_type is None:
> + return False
> + if object_type in ('commit', 'tag'):
> + return True
> + die('`%s` is a %s, but a commit or filename was expected' %
> + (value, object_type))
> +
> +
> +def get_object_type(value):
> + """Returns a string description of an object's type, or None if it is
> not
> + a valid git object."""
> + cmd = ['git', 'cat-file', '-t', value]
> + p = subprocess.Popen(cmd, stdout=subprocess.PIPE,
> stderr=subprocess.PIPE)
> + stdout, stderr = p.communicate()
> + if p.returncode != 0:
> + return None
> + return stdout.strip()
> +
> +
> +def compute_diff_and_extract_lines(commit, files):
> + """Calls compute_diff() followed by extract_lines()."""
> + diff_process = compute_diff(commit, files)
> + changed_lines = extract_lines(diff_process.stdout)
> + diff_process.stdout.close()
> + diff_process.wait()
> + if diff_process.returncode != 0:
> + # Assume error was already printed to stderr.
> + sys.exit(2)
> + return changed_lines
> +
> +
> +def compute_diff(commit, files):
> + """Return a subprocess object producing the diff from `commit`.
> +
> + The return value's `stdin` file object will produce a patch with the
> + differences between the working directory and `commit`, filtered on
> `files`
> + (if non-empty). Zero context lines are used in the patch."""
> + cmd = ['git', 'diff-index', '-p', '-U0', commit, '--']
> + cmd.extend(files)
> + p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
> + p.stdin.close()
> + return p
> +
> +
> +def extract_lines(patch_file):
> + """Extract the changed lines in `patch_file`.
> +
> + The input must have been produced with ``-U0``, meaning unidiff format
> with
> + zero lines of context. The return value is a dict mapping filename to a
> + list of line `Range`s."""
> + matches = {}
> + for line in patch_file:
> + match = re.search(r'^\+\+\+\ [^/]+/(.*)', line)
> + if match:
> + filename = match.group(1).rstrip('\r\n')
> + match = re.search(r'^@@ -[0-9,]+ \+(\d+)(,(\d+))?', line)
> + if match:
> + start_line = int(match.group(1))
> + line_count = 1
> + if match.group(3):
> + line_count = int(match.group(3))
> + if line_count > 0:
> + matches.setdefault(filename, []).append(Range(start_line,
> line_count))
> + return matches
> +
> +
> +def filter_by_extension(dictionary, allowed_extensions):
> + """Delete every key in `dictionary` that doesn't have an allowed
> extension.
> +
> + `allowed_extensions` must be a collection of lowercase file extensions,
> + excluding the period."""
> + allowed_extensions = frozenset(allowed_extensions)
> + for filename in dictionary.keys():
> + base_ext = filename.rsplit('.', 1)
> + if len(base_ext) == 1 or base_ext[1].lower() not in
> allowed_extensions:
> + del dictionary[filename]
> +
> +
> +def cd_to_toplevel():
> + """Change to the top level of the git repository."""
> + toplevel = run('git', 'rev-parse', '--show-toplevel')
> + os.chdir(toplevel)
> +
> +
> +def lines_to_bytes(changed_lines):
> + """Convert the mapping of changed line ranges to changed byte ranges.
> +
> + This function opens each file to compute the byte ranges."""
> + changed_bytes = {}
> + for filename, line_ranges in changed_lines.iteritems():
> + with open(filename) as f:
> + changed_bytes[filename] = lines_to_bytes_single_file(f, line_ranges)
> + return changed_bytes
> +
> +
> +def lines_to_bytes_single_file(file, line_ranges):
> + byte_ranges = []
> + line_ranges_iter = iter(line_ranges + [None])
> + r = next(line_ranges_iter)
> + linenum = 1
> + byte_idx = 0
> + byte_start = None
> + byte_count = None
> + for line in file:
> + if r is None:
> + break
> + if linenum == r.start:
> + byte_start = byte_idx
> + if linenum == r.start + r.count:
> + byte_ranges.append(Range(byte_start, byte_idx - byte_start))
>
^ Is this correct? With this, the --length that's passed to clang-format
will include the trailing '\n', which apparently causes clang-format to
format the next line as well. I'm not sure if that's a bug in this script,
in clang-format, or both.
Here's an example where this caused formatting on a line that wasn't
touched by a patch:
https://codereview.chromium.org/16917011/diff/1/chrome/browser/search_engines/template_url_prepopulate_data.cc#newcode622
> + r = next(line_ranges_iter)
> + linenum += 1
> + byte_idx += len(line)
> + if r is not None:
> + # FIXME: Detect and warn if line ranges go past the end of file?
> + byte_ranges.append(Range(byte_start, byte_idx - byte_start))
> + return byte_ranges
> +
> +
> +def create_tree_from_workdir(filenames):
> + """Create a new git tree with the given files from the working
> directory.
> +
> + Returns the object ID (SHA-1) of the created tree."""
> + return create_tree(filenames, '--stdin')
> +
> +
> +def run_clang_format_and_save_to_tree(changed_bytes,
> binary='clang-format',
> + style=None):
> + """Run clang-format on each file and save the result to a git tree.
> +
> + Returns the object ID (SHA-1) of the created tree."""
> + def index_info_generator():
> + for filename, byte_ranges in changed_bytes.iteritems():
> + mode = oct(os.stat(filename).st_mode)
> + blob_id = clang_format_to_blob(filename, byte_ranges, binary=binary,
> + style=style)
> + yield '%s %s\t%s' % (mode, blob_id, filename)
> + return create_tree(index_info_generator(), '--index-info')
> +
> +
> +def create_tree(input_lines, mode):
> + """Create a tree object from the given input.
> +
> + If mode is '--stdin', it must be a list of filenames. If mode is
> + '--index-info' is must be a list of values suitable for "git
> update-index
> + --index-info", such as "<mode> <SP> <sha1> <TAB> <filename>". Any
> other mode
> + is invalid."""
> + assert mode in ('--stdin', '--index-info')
> + cmd = ['git', 'update-index', '--add', '-z', mode]
> + with temporary_index_file():
> + p = subprocess.Popen(cmd, stdin=subprocess.PIPE)
> + for line in input_lines:
> + p.stdin.write('%s\0' % line)
> + p.stdin.close()
> + if p.wait() != 0:
> + die('`%s` failed' % ' '.join(cmd))
> + tree_id = run('git', 'write-tree')
> + return tree_id
> +
> +
> +def clang_format_to_blob(filename, byte_ranges, binary='clang-format',
> + style=None):
> + """Run clang-format on the given file and save the result to a git blob.
> +
> + Returns the object ID (SHA-1) of the created blob."""
> + clang_format_cmd = [binary, filename]
> + if style:
> + clang_format_cmd.extend(['-style='+style])
> + for offset, length in byte_ranges:
> + clang_format_cmd.extend(['-offset='+str(offset),
> '-length='+str(length)])
> + try:
> + clang_format = subprocess.Popen(clang_format_cmd,
> stdin=subprocess.PIPE,
> + stdout=subprocess.PIPE)
> + except OSError as e:
> + if e.errno == errno.ENOENT:
> + die('cannot find executable "%s"' % binary)
> + else:
> + raise
> + clang_format.stdin.close()
> + hash_object_cmd = ['git', 'hash-object', '-w', '--path='+filename,
> '--stdin']
> + hash_object = subprocess.Popen(hash_object_cmd,
> stdin=clang_format.stdout,
> + stdout=subprocess.PIPE)
> + clang_format.stdout.close()
> + stdout = hash_object.communicate()[0]
> + if hash_object.returncode != 0:
> + die('`%s` failed' % ' '.join(hash_object_cmd))
> + if clang_format.wait() != 0:
> + die('`%s` failed' % ' '.join(clang_format_cmd))
> + return stdout.rstrip('\r\n')
> +
> +
> + at contextlib.contextmanager
> +def temporary_index_file(tree=None):
> + """Context manager for setting GIT_INDEX_FILE to a temporary file and
> deleting
> + the file afterward."""
> + index_path = create_temporary_index(tree)
> + old_index_path = os.environ.get('GIT_INDEX_FILE')
> + os.environ['GIT_INDEX_FILE'] = index_path
> + try:
> + yield
> + finally:
> + if old_index_path is None:
> + del os.environ['GIT_INDEX_FILE']
> + else:
> + os.environ['GIT_INDEX_FILE'] = old_index_path
> + os.remove(index_path)
> +
> +
> +def create_temporary_index(tree=None):
> + """Create a temporary index file and return the created file's path.
> +
> + If `tree` is not None, use that as the tree to read in. Otherwise, an
> + empty index is created."""
> + gitdir = run('git', 'rev-parse', '--git-dir')
> + path = os.path.join(gitdir, temp_index_basename)
> + if tree is None:
> + tree = '--empty'
> + run('git', 'read-tree', '--index-output='+path, tree)
> + return path
> +
> +
> +def print_diff(old_tree, new_tree):
> + """Print the diff between the two trees to stdout."""
> + # We use the porcelain 'diff' and not plumbing 'diff-tree' because the
> output
> + # is expected to be viewed by the user, and only the former does nice
> things
> + # like color and pagination.
> + subprocess.check_call(['git', 'diff', old_tree, new_tree, '--'])
> +
> +
> +def apply_changes(old_tree, new_tree, force=False, patch_mode=False):
> + """Apply the changes in `new_tree` to the working directory.
> +
> + Bails if there are local changes in those files and not `force`. If
> + `patch_mode`, runs `git checkout --patch` to select hunks
> interactively."""
> + changed_files = run('git', 'diff-tree', '-r', '-z', '--name-only',
> old_tree,
> + new_tree).rstrip('\0').split('\0')
> + if not force:
> + unstaged_files = run('git', 'diff-files', '--name-status',
> *changed_files)
> + if unstaged_files:
> + print >>sys.stderr, ('The following files would be modified but '
> + 'have unstaged changes:')
> + print >>sys.stderr, unstaged_files
> + print >>sys.stderr, 'Please commit, stage, or stash them first.'
> + sys.exit(2)
> + if patch_mode:
> + # In patch mode, we could just as well create an index from the new
> tree
> + # and checkout from that, but then the user will be presented with a
> + # message saying "Discard ... from worktree". Instead, we use the old
> + # tree as the index and checkout from new_tree, which gives the
> slightly
> + # better message, "Apply ... to index and worktree". This is not
> quite
> + # right, since it won't be applied to the user's index, but oh well.
> + with temporary_index_file(old_tree):
> + subprocess.check_call(['git', 'checkout', '--patch', new_tree])
> + index_tree = old_tree
> + else:
> + with temporary_index_file(new_tree):
> + run('git', 'checkout-index', '-a', '-f')
> + return changed_files
> +
> +
> +def run(*args, **kwargs):
> + stdin = kwargs.pop('stdin', '')
> + verbose = kwargs.pop('verbose', True)
> + strip = kwargs.pop('strip', True)
> + for name in kwargs:
> + raise TypeError("run() got an unexpected keyword argument '%s'" %
> name)
> + p = subprocess.Popen(args, stdout=subprocess.PIPE,
> stderr=subprocess.PIPE,
> + stdin=subprocess.PIPE)
> + stdout, stderr = p.communicate(input=stdin)
> + if p.returncode == 0:
> + if stderr:
> + if verbose:
> + print >>sys.stderr, '`%s` printed to stderr:' % ' '.join(args)
> + print >>sys.stderr, stderr.rstrip()
> + if strip:
> + stdout = stdout.rstrip('\r\n')
> + return stdout
> + if verbose:
> + print >>sys.stderr, '`%s` returned %s' % (' '.join(args),
> p.returncode)
> + if stderr:
> + print >>sys.stderr, stderr.rstrip()
> + sys.exit(2)
> +
> +
> +def die(message):
> + print >>sys.stderr, 'error:', message
> + sys.exit(2)
> +
> +
> +if __name__ == '__main__':
> + main()
>
> Propchange: cfe/trunk/tools/clang-format/git-clang-format
>
> ------------------------------------------------------------------------------
> svn:executable = *
>
>
> _______________________________________________
> cfe-commits mailing list
> cfe-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20130621/80a5834f/attachment.html>
More information about the cfe-commits
mailing list