r292593 - Use UTF-8 for all communication with clang-format

Philipp Stephani via cfe-commits cfe-commits at lists.llvm.org
Fri Jan 20 01:37:50 PST 2017


Author: phst
Date: Fri Jan 20 03:37:50 2017
New Revision: 292593

URL: http://llvm.org/viewvc/llvm-project?rev=292593&view=rev
Log:
Use UTF-8 for all communication with clang-format

Summary: Instead of picking the buffer file coding system, always use utf-8-unix for communicating with clang-format.  This is fine because clang-format never actually reads the file to be formatted, only standard input.  This is a bit simpler (process coding system is now a constant) and potentially faster, as utf-8-unix is Emacs's internal coding system.  Also add an end-to-end test that actually invokes clang-format.

Reviewers: klimek

Reviewed By: klimek

Differential Revision: https://reviews.llvm.org/D28904

Modified:
    cfe/trunk/tools/clang-format/clang-format-test.el
    cfe/trunk/tools/clang-format/clang-format.el

Modified: cfe/trunk/tools/clang-format/clang-format-test.el
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/tools/clang-format/clang-format-test.el?rev=292593&r1=292592&r2=292593&view=diff
==============================================================================
--- cfe/trunk/tools/clang-format/clang-format-test.el (original)
+++ cfe/trunk/tools/clang-format/clang-format-test.el Fri Jan 20 03:37:50 2017
@@ -31,8 +31,8 @@
                    (with-current-buffer stdout
                      (insert "<?xml version='1.0'?>
 <replacements xml:space='preserve' incomplete_format='false'>
-<replacement offset='7' length='0'> </replacement>
-<replacement offset='14' length='0'> </replacement>
+<replacement offset='4' length='0'> </replacement>
+<replacement offset='10' length='0'> </replacement>
 </replacements>
 "))
                    0)))))
@@ -58,15 +58,14 @@
        (should (equal args
                       '("-output-replacements-xml" "-assume-filename" "foo.cpp"
                         "-style" "file"
-                        ;; Length of the UTF-8 byte-order mark.
-                        "-offset" "3"
+                        ;; Beginning of buffer, no byte-order mark.
+                        "-offset" "0"
                         ;; We have two lines with 2×2 bytes for the umlauts,
-                        ;; 2 bytes for the line ending, and 3 bytes for the
+                        ;; 1 byte for the line ending, and 3 bytes for the
                         ;; other ASCII characters each.
-                        "-length" "18"
-                        ;; Length of a single line (without line ending) plus
-                        ;; BOM.
-                        "-cursor" "10")))))))
+                        "-length" "16"
+                        ;; Length of a single line (without line ending).
+                        "-cursor" "7")))))))
 
 (ert-deftest clang-format-buffer--process-encoding ()
   "Tests that text is sent to the clang-format process in the
@@ -105,6 +104,23 @@ right encoding."
         (clang-format-buffer))
       (should (equal (buffer-string) "ä\n"))
       (should (eobp)))
-    (should (equal call-process-inputs '("ef bb bf c3 a4 0d 0a ")))))
+    (should (equal call-process-inputs '("c3 a4 0a ")))))
+
+(ert-deftest clang-format-buffer--end-to-end ()
+  "End-to-end test for ‘clang-format-buffer’.
+Actually calls the clang-format binary."
+  (skip-unless (file-executable-p clang-format-executable))
+  (with-temp-buffer
+    (let ((buffer-file-name "foo.cpp")
+          (buffer-file-coding-system 'utf-8-with-signature-dos)
+          (default-process-coding-system 'latin-1-unix))
+      (insert "ä =ö;\nü= ß;\n")
+      (goto-char (point-min))
+      (end-of-line)
+      (clang-format-buffer))
+    (should (equal (buffer-string) "ä = ö;\nü = ß;\n"))
+    (should (eolp))
+    (should (equal (buffer-substring (point) (point-max))
+                   "\nü = ß;\n"))))
 
 ;;; clang-format-test.el ends here

Modified: cfe/trunk/tools/clang-format/clang-format.el
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/tools/clang-format/clang-format.el?rev=292593&r1=292592&r2=292593&view=diff
==============================================================================
--- cfe/trunk/tools/clang-format/clang-format.el (original)
+++ cfe/trunk/tools/clang-format/clang-format.el Fri Jan 20 03:37:50 2017
@@ -95,9 +95,10 @@ of the buffer."
 (defun clang-format--replace (offset length &optional text)
   "Replace the region defined by OFFSET and LENGTH with TEXT.
 OFFSET and LENGTH are measured in bytes, not characters.  OFFSET
-is a zero-based file offset."
-  (let ((start (clang-format--filepos-to-bufferpos offset 'exact))
-        (end (clang-format--filepos-to-bufferpos (+ offset length) 'exact)))
+is a zero-based file offset, assuming ‘utf-8-unix’ coding."
+  (let ((start (clang-format--filepos-to-bufferpos offset 'exact 'utf-8-unix))
+        (end (clang-format--filepos-to-bufferpos (+ offset length) 'exact
+                                                 'utf-8-unix)))
     (goto-char start)
     (delete-region start end)
     (when text
@@ -130,15 +131,18 @@ is no active region.  If no style is giv
   (unless style
     (setq style clang-format-style))
 
-  (let ((file-start (clang-format--bufferpos-to-filepos start 'approximate))
-        (file-end (clang-format--bufferpos-to-filepos end 'approximate))
-        (cursor (clang-format--bufferpos-to-filepos (point) 'exact))
+  (let ((file-start (clang-format--bufferpos-to-filepos start 'approximate
+                                                        'utf-8-unix))
+        (file-end (clang-format--bufferpos-to-filepos end 'approximate
+                                                      'utf-8-unix))
+        (cursor (clang-format--bufferpos-to-filepos (point) 'exact 'utf-8-unix))
         (temp-buffer (generate-new-buffer " *clang-format-temp*"))
         (temp-file (make-temp-file "clang-format"))
-        (default-process-coding-system
-          ;; Output is XML, which is always UTF-8.  Input encoding should match
-          ;; the file encoding, otherwise the offsets calculated above are off.
-          (cons 'utf-8-unix buffer-file-coding-system)))
+        ;; Output is XML, which is always UTF-8.  Input encoding should match
+        ;; the encoding used to convert between buffer and file positions,
+        ;; otherwise the offsets calculated above are off.  For simplicity, we
+        ;; always use ‘utf-8-unix’ and ignore the buffer coding system.
+        (default-process-coding-system '(utf-8-unix . utf-8-unix)))
     (unwind-protect
         (let ((status (call-process-region
                        nil nil clang-format-executable
@@ -168,7 +172,8 @@ is no active region.  If no style is giv
               (dolist (rpl replacements)
                 (apply #'clang-format--replace rpl)))
             (when cursor
-              (goto-char (clang-format--filepos-to-bufferpos cursor 'exact)))
+              (goto-char (clang-format--filepos-to-bufferpos cursor 'exact
+                                                             'utf-8-unix)))
             (if incomplete-format
                 (message "(clang-format: incomplete (syntax errors)%s)" stderr)
               (message "(clang-format: success%s)" stderr))))




More information about the cfe-commits mailing list