[clang] d14e7ee - [clang-format] Add -j to clang-format-diff to speed up formatting

Owen Pan via cfe-commits cfe-commits at lists.llvm.org
Thu Mar 2 11:59:44 PST 2023


Author: Sean Maher
Date: 2023-03-02T11:59:11-08:00
New Revision: d14e7ee3d17cfa60d44256d742c10e9949a6048f

URL: https://github.com/llvm/llvm-project/commit/d14e7ee3d17cfa60d44256d742c10e9949a6048f
DIFF: https://github.com/llvm/llvm-project/commit/d14e7ee3d17cfa60d44256d742c10e9949a6048f.diff

LOG: [clang-format] Add -j to clang-format-diff to speed up formatting

This patch changes the implementation of clang-format-diff.py to
start up many clang-format processes in parallel in order to speed
up clang-format-diff.py by several orders of magnitude on large
patches.

Differential Revision: https://reviews.llvm.org/D141230

Added: 
    

Modified: 
    clang/tools/clang-format/clang-format-diff.py

Removed: 
    


################################################################################
diff  --git a/clang/tools/clang-format/clang-format-
diff .py b/clang/tools/clang-format/clang-format-
diff .py
index 1dcc8689d5fef..ce971e414e3d4 100755
--- a/clang/tools/clang-format/clang-format-
diff .py
+++ b/clang/tools/clang-format/clang-format-
diff .py
@@ -34,6 +34,21 @@
 else:
     from io import BytesIO as StringIO
 
+def process_subprocess_result(proc, args):
+  stdout, stderr = proc.communicate()
+  if proc.returncode != 0:
+    sys.exit(proc.returncode)
+  if not args.i:
+    with open(filename) as f:
+      code = f.readlines()
+    formatted_code = StringIO(stdout).readlines()
+    
diff  = 
diff lib.unified_
diff (code, formatted_code,
+                                filename, filename,
+                                '(before formatting)',
+                                '(after formatting)')
+    
diff _string = ''.join(
diff )
+    if len(
diff _string) > 0:
+      sys.stdout.write(
diff _string)
 
 def main():
   parser = argparse.ArgumentParser(description=__doc__,
@@ -65,6 +80,9 @@ def main():
                       'file to use.')
   parser.add_argument('-binary', default='clang-format',
                       help='location of binary to use for clang-format')
+  parser.add_argument('-j', default=1, type=int, metavar='N',
+                      help='number of concurrent clang-format processes to spawn in '
+                      'parallel')
   args = parser.parse_args()
 
   # Extract changed lines for each file.
@@ -106,46 +124,54 @@ def main():
           ['-lines', str(start_line) + ':' + str(end_line)])
 
   # Reformat files containing changes in place.
-  for filename, lines in lines_by_file.items():
-    if args.i and args.verbose:
-      print('Formatting {}'.format(filename))
-    command = [args.binary, filename]
-    if args.i:
-      command.append('-i')
-    if args.sort_includes:
-      command.append('-sort-includes')
-    command.extend(lines)
-    if args.style:
-      command.extend(['-style', args.style])
-    if args.fallback_style:
-      command.extend(['-fallback-style', args.fallback_style])
-
-    try:
-      p = subprocess.Popen(command,
-                           stdout=subprocess.PIPE,
-                           stderr=None,
-                           stdin=subprocess.PIPE,
-                           universal_newlines=True)
-    except OSError as e:
-      # Give the user more context when clang-format isn't
-      # found/isn't executable, etc.
-      raise RuntimeError(
-        'Failed to run "%s" - %s"' % (" ".join(command), e.strerror))
-
-    stdout, stderr = p.communicate()
-    if p.returncode != 0:
-      sys.exit(p.returncode)
-
-    if not args.i:
-      with open(filename) as f:
-        code = f.readlines()
-      formatted_code = StringIO(stdout).readlines()
-      
diff  = 
diff lib.unified_
diff (code, formatted_code,
-                                  filename, filename,
-                                  '(before formatting)', '(after formatting)')
-      
diff _string = ''.join(
diff )
-      if len(
diff _string) > 0:
-        sys.stdout.write(
diff _string)
+  lbf = list(lines_by_file.items())
+  procs = [None for i in range(args.j)]
+  while lbf:
+    spawned_one = False
+    for i, proc in enumerate(procs):
+      if not lbf:
+        break
+      if proc is not None and proc.poll() is not None:
+        process_subprocess_result(proc, args)
+        # Set to None to flag the slot as free to start a new process
+        procs[i] = None
+        proc = None
+      if proc is None:
+        filename, lines = lbf.pop()
+        spawned_one = True
+        if args.i and args.verbose:
+          print('Formatting {}'.format(filename))
+        command = [args.binary, filename]
+        if args.i:
+          command.append('-i')
+        if args.sort_includes:
+          command.append('-sort-includes')
+        command.extend(lines)
+        if args.style:
+          command.extend(['-style', args.style])
+        if args.fallback_style:
+          command.extend(['-fallback-style', args.fallback_style])
+        try:
+          procs[i] = subprocess.Popen(command,
+                                      stdout=subprocess.PIPE,
+                                      stderr=None,
+                                      stdin=subprocess.PIPE,
+                                      universal_newlines=True)
+        except OSError as e:
+          # Give the user more context when clang-format isn't
+          # found/isn't executable, etc.
+          raise RuntimeError(
+              'Failed to run "%s" - %s"' % (" ".join(command), e.strerror))
+    # If we didn't spawn a single process after iterating through the whole
+    # list, wait on one of them to finish until we iterate through again, to
+    # prevent spinning in the case where we have a small number of jobs.
+    if not spawned_one:
+      procs[0].wait()
+  # Be sure not to leave any stray processes when exiting.
+  for proc in procs:
+    if proc:
+      proc.wait()
+      process_subprocess_result(proc, args)
 
 if __name__ == '__main__':
   main()


        


More information about the cfe-commits mailing list