r182596 - clang-format integration for git.

Daniel Jasper djasper at google.com
Tue Jul 2 06:22:13 PDT 2013


Fixed in r185423.


On Fri, Jun 21, 2013 at 11:00 PM, Nico Weber <thakis at chromium.org> wrote:

> I just had the chance to try this. One question below.
>
>
> On Thu, May 23, 2013 at 10:53 AM, Daniel Jasper <djasper at google.com>wrote:
>
>> Author: djasper
>> Date: Thu May 23 12:53:42 2013
>> New Revision: 182596
>>
>> URL: http://llvm.org/viewvc/llvm-project?rev=182596&view=rev
>> Log:
>> clang-format integration for git.
>>
>> Put this somewhere on your path and use:
>>
>> git clang-format
>>
>> Awesome work by Mark Lodato. Many thanks!
>>
>> Added:
>>     cfe/trunk/tools/clang-format/git-clang-format   (with props)
>>
>> Added: cfe/trunk/tools/clang-format/git-clang-format
>> URL:
>> http://llvm.org/viewvc/llvm-project/cfe/trunk/tools/clang-format/git-clang-format?rev=182596&view=auto
>>
>> ==============================================================================
>> --- cfe/trunk/tools/clang-format/git-clang-format (added)
>> +++ cfe/trunk/tools/clang-format/git-clang-format Thu May 23 12:53:42 2013
>> @@ -0,0 +1,513 @@
>> +#!/usr/bin/python
>> +#
>> +#===- git-clang-format - ClangFormat Git Integration ---------*- python
>> -*--===#
>> +#
>> +#                     The LLVM Compiler Infrastructure
>> +#
>> +# This file is distributed under the University of Illinois Open Source
>> +# License. See LICENSE.TXT for details.
>> +#
>>
>> +#===------------------------------------------------------------------------===#
>> +
>> +r"""
>> +clang-format git integration
>> +============================
>> +
>> +This file provides a clang-format integration for git. Put it somewhere
>> in your
>> +path and ensure that it is executable. Then, "git clang-format" will
>> invoke
>> +clang-format on the changes in current files or a specific commit.
>> +
>> +For further details, run:
>> +git clang-format -h
>> +
>> +Requires Python 2.7
>> +"""
>> +
>> +import argparse
>> +import collections
>> +import contextlib
>> +import errno
>> +import os
>> +import re
>> +import subprocess
>> +import sys
>> +
>> +usage = 'git clang-format [OPTIONS] [<commit>] [--] [<file>...]'
>> +
>> +desc = '''
>> +Run clang-format on all lines that differ between the working directory
>> +and <commit>, which defaults to HEAD.  Changes are only applied to the
>> working
>> +directory.
>> +
>> +The following git-config settings set the default of the corresponding
>> option:
>> +  clangFormat.binary
>> +  clangFormat.commit
>> +  clangFormat.extension
>> +  clangFormat.style
>> +'''
>> +
>> +# Name of the temporary index file in which save the output of
>> clang-format.
>> +# This file is created within the .git directory.
>> +temp_index_basename = 'clang-format-index'
>> +
>> +
>> +Range = collections.namedtuple('Range', 'start, count')
>> +
>> +
>> +def main():
>> +  config = load_git_config()
>> +
>> +  # In order to keep '--' yet allow options after positionals, we need to
>> +  # check for '--' ourselves.  (Setting nargs='*' throws away the '--',
>> while
>> +  # nargs=argparse.REMAINDER disallows options after positionals.)
>> +  argv = sys.argv[1:]
>> +  try:
>> +    idx = argv.index('--')
>> +  except ValueError:
>> +    dash_dash = []
>> +  else:
>> +    dash_dash = argv[idx:]
>> +    argv = argv[:idx]
>> +
>> +  default_extensions = ','.join([
>> +      # From clang/lib/Frontend/FrontendOptions.cpp, all lower case
>> +      'c', 'h',  # C
>> +      'm',  # ObjC
>> +      'mm',  # ObjC++
>> +      'cc', 'cp', 'cpp', 'c++', 'cxx', 'hpp',  # C++
>> +      ])
>> +
>> +  p = argparse.ArgumentParser(
>> +    usage=usage, formatter_class=argparse.RawDescriptionHelpFormatter,
>> +    description=desc)
>> +  p.add_argument('--binary',
>> +                 default=config.get('clangformat.binary',
>> 'clang-format'),
>> +                 help='path to clang-format'),
>> +  p.add_argument('--commit',
>> +                 default=config.get('clangformat.commit', 'HEAD'),
>> +                 help='default commit to use if none is specified'),
>> +  p.add_argument('--diff', action='store_true',
>> +                 help='print a diff instead of applying the changes')
>> +  p.add_argument('--extensions',
>> +                 default=config.get('clangformat.extensions',
>> +                                    default_extensions),
>> +                 help=('comma-separated list of file extensions to
>> format, '
>> +                       'excluding the period and case-insensitive')),
>> +  p.add_argument('-f', '--force', action='store_true',
>> +                 help='allow changes to unstaged files')
>> +  p.add_argument('-p', '--patch', action='store_true',
>> +                 help='select hunks interactively')
>> +  p.add_argument('-q', '--quiet', action='count', default=0,
>> +                 help='print less information')
>> +  p.add_argument('--style',
>> +                 default=config.get('clangformat.style', None),
>> +                 help='passed to clang-format'),
>> +  p.add_argument('-v', '--verbose', action='count', default=0,
>> +                 help='print extra information')
>> +  # We gather all the remaining positional arguments into 'args' since
>> we need
>> +  # to use some heuristics to determine whether or not <commit> was
>> present.
>> +  # However, to print pretty messages, we make use of metavar and help.
>> +  p.add_argument('args', nargs='*', metavar='<commit>',
>> +                 help='revision from which to compute the diff')
>> +  p.add_argument('ignored', nargs='*', metavar='<file>...',
>> +                 help='if specified, only consider differences in these
>> files')
>> +  opts = p.parse_args(argv)
>> +
>> +  opts.verbose -= opts.quiet
>> +  del opts.quiet
>> +
>> +  commit, files = interpret_args(opts.args, dash_dash, opts.commit)
>> +  changed_lines = compute_diff_and_extract_lines(commit, files)
>> +  if opts.verbose >= 1:
>> +    ignored_files = set(changed_lines)
>> +  filter_by_extension(changed_lines, opts.extensions.lower().split(','))
>> +  if opts.verbose >= 1:
>> +    ignored_files.difference_update(changed_lines)
>> +    if ignored_files:
>> +      print 'Ignoring changes in the following files (wrong extension):'
>> +      for filename in ignored_files:
>> +        print '   ', filename
>> +    if changed_lines:
>> +      print 'Running clang-format on the following files:'
>> +      for filename in changed_lines:
>> +        print '   ', filename
>> +  if not changed_lines:
>> +    print 'no modified files to format'
>> +    return
>> +  # The computed diff outputs absolute paths, so we must cd before
>> accessing
>> +  # those files.
>> +  cd_to_toplevel()
>> +  changed_bytes = lines_to_bytes(changed_lines)
>> +  old_tree = create_tree_from_workdir(changed_bytes)
>> +  new_tree = run_clang_format_and_save_to_tree(changed_bytes,
>> +                                               binary=opts.binary,
>> +                                               style=opts.style)
>> +  if opts.verbose >= 1:
>> +    print 'old tree:', old_tree
>> +    print 'new tree:', new_tree
>> +  if old_tree == new_tree:
>> +    if opts.verbose >= 0:
>> +      print 'clang-format did not modify any files'
>> +  elif opts.diff:
>> +    print_diff(old_tree, new_tree)
>> +  else:
>> +    changed_files = apply_changes(old_tree, new_tree, force=opts.force,
>> +                                  patch_mode=opts.patch)
>> +    if (opts.verbose >= 0 and not opts.patch) or opts.verbose >= 1:
>> +      print 'changed files:'
>> +      for filename in changed_files:
>> +        print '   ', filename
>> +
>> +
>> +def load_git_config(non_string_options=None):
>> +  """Return the git configuration as a dictionary.
>> +
>> +  All options are assumed to be strings unless in `non_string_options`,
>> in which
>> +  is a dictionary mapping option name (in lower case) to either "--bool"
>> or
>> +  "--int"."""
>> +  if non_string_options is None:
>> +    non_string_options = {}
>> +  out = {}
>> +  for entry in run('git', 'config', '--list', '--null').split('\0'):
>> +    if entry:
>> +      name, value = entry.split('\n', 1)
>> +      if name in non_string_options:
>> +        value = run('git', 'config', non_string_options[name], name)
>> +      out[name] = value
>> +  return out
>> +
>> +
>> +def interpret_args(args, dash_dash, default_commit):
>> +  """Interpret `args` as "[commit] [--] [files...]" and return (commit,
>> files).
>> +
>> +  It is assumed that "--" and everything that follows has been removed
>> from
>> +  args and placed in `dash_dash`.
>> +
>> +  If "--" is present (i.e., `dash_dash` is non-empty), the argument to
>> its
>> +  left (if present) is taken as commit.  Otherwise, the first argument is
>> +  checked if it is a commit or a file.  If commit is not given,
>> +  `default_commit` is used."""
>> +  if dash_dash:
>> +    if len(args) == 0:
>> +      commit = default_commit
>> +    elif len(args) > 1:
>> +      die('at most one commit allowed; %d given' % len(args))
>> +    else:
>> +      commit = args[0]
>> +    object_type = get_object_type(commit)
>> +    if object_type not in ('commit', 'tag'):
>> +      if object_type is None:
>> +        die("'%s' is not a commit" % commit)
>> +      else:
>> +        die("'%s' is a %s, but a commit was expected" % (commit,
>> object_type))
>> +    files = dash_dash[1:]
>> +  elif args:
>> +    if disambiguate_revision(args[0]):
>> +      commit = args[0]
>> +      files = args[1:]
>> +    else:
>> +      commit = default_commit
>> +      files = args
>> +  else:
>> +    commit = default_commit
>> +    files = []
>> +  return commit, files
>> +
>> +
>> +def disambiguate_revision(value):
>> +  """Returns True if `value` is a revision, False if it is a file, or
>> dies."""
>> +  # If `value` is ambiguous (neither a commit nor a file), the following
>> +  # command will die with an appropriate error message.
>> +  run('git', 'rev-parse', value, verbose=False)
>> +  object_type = get_object_type(value)
>> +  if object_type is None:
>> +    return False
>> +  if object_type in ('commit', 'tag'):
>> +    return True
>> +  die('`%s` is a %s, but a commit or filename was expected' %
>> +      (value, object_type))
>> +
>> +
>> +def get_object_type(value):
>> +  """Returns a string description of an object's type, or None if it is
>> not
>> +  a valid git object."""
>> +  cmd = ['git', 'cat-file', '-t', value]
>> +  p = subprocess.Popen(cmd, stdout=subprocess.PIPE,
>> stderr=subprocess.PIPE)
>> +  stdout, stderr = p.communicate()
>> +  if p.returncode != 0:
>> +    return None
>> +  return stdout.strip()
>> +
>> +
>> +def compute_diff_and_extract_lines(commit, files):
>> +  """Calls compute_diff() followed by extract_lines()."""
>> +  diff_process = compute_diff(commit, files)
>> +  changed_lines = extract_lines(diff_process.stdout)
>> +  diff_process.stdout.close()
>> +  diff_process.wait()
>> +  if diff_process.returncode != 0:
>> +    # Assume error was already printed to stderr.
>> +    sys.exit(2)
>> +  return changed_lines
>> +
>> +
>> +def compute_diff(commit, files):
>> +  """Return a subprocess object producing the diff from `commit`.
>> +
>> +  The return value's `stdin` file object will produce a patch with the
>> +  differences between the working directory and `commit`, filtered on
>> `files`
>> +  (if non-empty).  Zero context lines are used in the patch."""
>> +  cmd = ['git', 'diff-index', '-p', '-U0', commit, '--']
>> +  cmd.extend(files)
>> +  p = subprocess.Popen(cmd, stdin=subprocess.PIPE,
>> stdout=subprocess.PIPE)
>> +  p.stdin.close()
>> +  return p
>> +
>> +
>> +def extract_lines(patch_file):
>> +  """Extract the changed lines in `patch_file`.
>> +
>> +  The input must have been produced with ``-U0``, meaning unidiff format
>> with
>> +  zero lines of context.  The return value is a dict mapping filename to
>> a
>> +  list of line `Range`s."""
>> +  matches = {}
>> +  for line in patch_file:
>> +    match = re.search(r'^\+\+\+\ [^/]+/(.*)', line)
>> +    if match:
>> +      filename = match.group(1).rstrip('\r\n')
>> +    match = re.search(r'^@@ -[0-9,]+ \+(\d+)(,(\d+))?', line)
>> +    if match:
>> +      start_line = int(match.group(1))
>> +      line_count = 1
>> +      if match.group(3):
>> +        line_count = int(match.group(3))
>> +      if line_count > 0:
>> +        matches.setdefault(filename, []).append(Range(start_line,
>> line_count))
>> +  return matches
>> +
>> +
>> +def filter_by_extension(dictionary, allowed_extensions):
>> +  """Delete every key in `dictionary` that doesn't have an allowed
>> extension.
>> +
>> +  `allowed_extensions` must be a collection of lowercase file extensions,
>> +  excluding the period."""
>> +  allowed_extensions = frozenset(allowed_extensions)
>> +  for filename in dictionary.keys():
>> +    base_ext = filename.rsplit('.', 1)
>> +    if len(base_ext) == 1 or base_ext[1].lower() not in
>> allowed_extensions:
>> +      del dictionary[filename]
>> +
>> +
>> +def cd_to_toplevel():
>> +  """Change to the top level of the git repository."""
>> +  toplevel = run('git', 'rev-parse', '--show-toplevel')
>> +  os.chdir(toplevel)
>> +
>> +
>> +def lines_to_bytes(changed_lines):
>> +  """Convert the mapping of changed line ranges to changed byte ranges.
>> +
>> +  This function opens each file to compute the byte ranges."""
>> +  changed_bytes = {}
>> +  for filename, line_ranges in changed_lines.iteritems():
>> +    with open(filename) as f:
>> +      changed_bytes[filename] = lines_to_bytes_single_file(f,
>> line_ranges)
>> +  return changed_bytes
>> +
>> +
>> +def lines_to_bytes_single_file(file, line_ranges):
>> +  byte_ranges = []
>> +  line_ranges_iter = iter(line_ranges + [None])
>> +  r = next(line_ranges_iter)
>> +  linenum = 1
>> +  byte_idx = 0
>> +  byte_start = None
>> +  byte_count = None
>> +  for line in file:
>> +    if r is None:
>> +      break
>> +    if linenum == r.start:
>> +      byte_start = byte_idx
>> +    if linenum == r.start + r.count:
>> +      byte_ranges.append(Range(byte_start, byte_idx - byte_start))
>>
>
> ^ Is this correct? With this, the --length that's passed to clang-format
> will include the trailing '\n', which apparently causes clang-format to
> format the next line as well. I'm not sure if that's a bug in this script,
> in clang-format, or both.
>
> Here's an example where this caused formatting on a line that wasn't
> touched by a patch:
> https://codereview.chromium.org/16917011/diff/1/chrome/browser/search_engines/template_url_prepopulate_data.cc#newcode622
>
>
>> +      r = next(line_ranges_iter)
>> +    linenum += 1
>> +    byte_idx += len(line)
>> +  if r is not None:
>> +    # FIXME: Detect and warn if line ranges go past the end of file?
>> +    byte_ranges.append(Range(byte_start, byte_idx - byte_start))
>> +  return byte_ranges
>> +
>> +
>> +def create_tree_from_workdir(filenames):
>> +  """Create a new git tree with the given files from the working
>> directory.
>> +
>> +  Returns the object ID (SHA-1) of the created tree."""
>> +  return create_tree(filenames, '--stdin')
>> +
>> +
>> +def run_clang_format_and_save_to_tree(changed_bytes,
>> binary='clang-format',
>> +                                      style=None):
>> +  """Run clang-format on each file and save the result to a git tree.
>> +
>> +  Returns the object ID (SHA-1) of the created tree."""
>> +  def index_info_generator():
>> +    for filename, byte_ranges in changed_bytes.iteritems():
>> +      mode = oct(os.stat(filename).st_mode)
>> +      blob_id = clang_format_to_blob(filename, byte_ranges,
>> binary=binary,
>> +                                     style=style)
>> +      yield '%s %s\t%s' % (mode, blob_id, filename)
>> +  return create_tree(index_info_generator(), '--index-info')
>> +
>> +
>> +def create_tree(input_lines, mode):
>> +  """Create a tree object from the given input.
>> +
>> +  If mode is '--stdin', it must be a list of filenames.  If mode is
>> +  '--index-info' is must be a list of values suitable for "git
>> update-index
>> +  --index-info", such as "<mode> <SP> <sha1> <TAB> <filename>".  Any
>> other mode
>> +  is invalid."""
>> +  assert mode in ('--stdin', '--index-info')
>> +  cmd = ['git', 'update-index', '--add', '-z', mode]
>> +  with temporary_index_file():
>> +    p = subprocess.Popen(cmd, stdin=subprocess.PIPE)
>> +    for line in input_lines:
>> +      p.stdin.write('%s\0' % line)
>> +    p.stdin.close()
>> +    if p.wait() != 0:
>> +      die('`%s` failed' % ' '.join(cmd))
>> +    tree_id = run('git', 'write-tree')
>> +    return tree_id
>> +
>> +
>> +def clang_format_to_blob(filename, byte_ranges, binary='clang-format',
>> +                         style=None):
>> +  """Run clang-format on the given file and save the result to a git
>> blob.
>> +
>> +  Returns the object ID (SHA-1) of the created blob."""
>> +  clang_format_cmd = [binary, filename]
>> +  if style:
>> +    clang_format_cmd.extend(['-style='+style])
>> +  for offset, length in byte_ranges:
>> +    clang_format_cmd.extend(['-offset='+str(offset),
>> '-length='+str(length)])
>> +  try:
>> +    clang_format = subprocess.Popen(clang_format_cmd,
>> stdin=subprocess.PIPE,
>> +                                    stdout=subprocess.PIPE)
>> +  except OSError as e:
>> +    if e.errno == errno.ENOENT:
>> +      die('cannot find executable "%s"' % binary)
>> +    else:
>> +      raise
>> +  clang_format.stdin.close()
>> +  hash_object_cmd = ['git', 'hash-object', '-w', '--path='+filename,
>> '--stdin']
>> +  hash_object = subprocess.Popen(hash_object_cmd,
>> stdin=clang_format.stdout,
>> +                                 stdout=subprocess.PIPE)
>> +  clang_format.stdout.close()
>> +  stdout = hash_object.communicate()[0]
>> +  if hash_object.returncode != 0:
>> +    die('`%s` failed' % ' '.join(hash_object_cmd))
>> +  if clang_format.wait() != 0:
>> +    die('`%s` failed' % ' '.join(clang_format_cmd))
>> +  return stdout.rstrip('\r\n')
>> +
>> +
>> + at contextlib.contextmanager
>> +def temporary_index_file(tree=None):
>> +  """Context manager for setting GIT_INDEX_FILE to a temporary file and
>> deleting
>> +  the file afterward."""
>> +  index_path = create_temporary_index(tree)
>> +  old_index_path = os.environ.get('GIT_INDEX_FILE')
>> +  os.environ['GIT_INDEX_FILE'] = index_path
>> +  try:
>> +    yield
>> +  finally:
>> +    if old_index_path is None:
>> +      del os.environ['GIT_INDEX_FILE']
>> +    else:
>> +      os.environ['GIT_INDEX_FILE'] = old_index_path
>> +    os.remove(index_path)
>> +
>> +
>> +def create_temporary_index(tree=None):
>> +  """Create a temporary index file and return the created file's path.
>> +
>> +  If `tree` is not None, use that as the tree to read in.  Otherwise, an
>> +  empty index is created."""
>> +  gitdir = run('git', 'rev-parse', '--git-dir')
>> +  path = os.path.join(gitdir, temp_index_basename)
>> +  if tree is None:
>> +    tree = '--empty'
>> +  run('git', 'read-tree', '--index-output='+path, tree)
>> +  return path
>> +
>> +
>> +def print_diff(old_tree, new_tree):
>> +  """Print the diff between the two trees to stdout."""
>> +  # We use the porcelain 'diff' and not plumbing 'diff-tree' because the
>> output
>> +  # is expected to be viewed by the user, and only the former does nice
>> things
>> +  # like color and pagination.
>> +  subprocess.check_call(['git', 'diff', old_tree, new_tree, '--'])
>> +
>> +
>> +def apply_changes(old_tree, new_tree, force=False, patch_mode=False):
>> +  """Apply the changes in `new_tree` to the working directory.
>> +
>> +  Bails if there are local changes in those files and not `force`.  If
>> +  `patch_mode`, runs `git checkout --patch` to select hunks
>> interactively."""
>> +  changed_files = run('git', 'diff-tree', '-r', '-z', '--name-only',
>> old_tree,
>> +                      new_tree).rstrip('\0').split('\0')
>> +  if not force:
>> +    unstaged_files = run('git', 'diff-files', '--name-status',
>> *changed_files)
>> +    if unstaged_files:
>> +      print >>sys.stderr, ('The following files would be modified but '
>> +                           'have unstaged changes:')
>> +      print >>sys.stderr, unstaged_files
>> +      print >>sys.stderr, 'Please commit, stage, or stash them first.'
>> +      sys.exit(2)
>> +  if patch_mode:
>> +    # In patch mode, we could just as well create an index from the new
>> tree
>> +    # and checkout from that, but then the user will be presented with a
>> +    # message saying "Discard ... from worktree".  Instead, we use the
>> old
>> +    # tree as the index and checkout from new_tree, which gives the
>> slightly
>> +    # better message, "Apply ... to index and worktree".  This is not
>> quite
>> +    # right, since it won't be applied to the user's index, but oh well.
>> +    with temporary_index_file(old_tree):
>> +      subprocess.check_call(['git', 'checkout', '--patch', new_tree])
>> +    index_tree = old_tree
>> +  else:
>> +    with temporary_index_file(new_tree):
>> +      run('git', 'checkout-index', '-a', '-f')
>> +  return changed_files
>> +
>> +
>> +def run(*args, **kwargs):
>> +  stdin = kwargs.pop('stdin', '')
>> +  verbose = kwargs.pop('verbose', True)
>> +  strip = kwargs.pop('strip', True)
>> +  for name in kwargs:
>> +    raise TypeError("run() got an unexpected keyword argument '%s'" %
>> name)
>> +  p = subprocess.Popen(args, stdout=subprocess.PIPE,
>> stderr=subprocess.PIPE,
>> +                       stdin=subprocess.PIPE)
>> +  stdout, stderr = p.communicate(input=stdin)
>> +  if p.returncode == 0:
>> +    if stderr:
>> +      if verbose:
>> +        print >>sys.stderr, '`%s` printed to stderr:' % ' '.join(args)
>> +      print >>sys.stderr, stderr.rstrip()
>> +    if strip:
>> +      stdout = stdout.rstrip('\r\n')
>> +    return stdout
>> +  if verbose:
>> +    print >>sys.stderr, '`%s` returned %s' % (' '.join(args),
>> p.returncode)
>> +  if stderr:
>> +    print >>sys.stderr, stderr.rstrip()
>> +  sys.exit(2)
>> +
>> +
>> +def die(message):
>> +  print >>sys.stderr, 'error:', message
>> +  sys.exit(2)
>> +
>> +
>> +if __name__ == '__main__':
>> +  main()
>>
>> Propchange: cfe/trunk/tools/clang-format/git-clang-format
>>
>> ------------------------------------------------------------------------------
>>     svn:executable = *
>>
>>
>> _______________________________________________
>> cfe-commits mailing list
>> cfe-commits at cs.uiuc.edu
>> http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits
>>
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20130702/010153ff/attachment.html>


More information about the cfe-commits mailing list