[PATCH] D43165: [lit] Fix problem in how Python versions open files with different encodings

Aaron Smith via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Mon Apr 2 12:21:07 PDT 2018


asmith updated this revision to Diff 140661.
asmith edited the summary of this revision.
asmith added a comment.

This is changed based on the reviewers suggestions. First open as usual and on failure reopen as UTF8 and when that fails reopen as binary. Verified on Windows and Linux.


https://reviews.llvm.org/D43165

Files:
  lit/lit/TestRunner.py


Index: lit/lit/TestRunner.py
===================================================================
--- lit/lit/TestRunner.py
+++ lit/lit/TestRunner.py
@@ -2,6 +2,7 @@
 import difflib
 import errno
 import functools
+import io
 import itertools
 import getopt
 import os, signal, subprocess, sys
@@ -385,9 +386,51 @@
             return path, sorted(child_trees)
 
     def compareTwoFiles(filepaths):
+        compare_bytes = False
+        encoding = None
         filelines = []
         for file in filepaths:
-            with open(file, 'r') as f:
+            try:
+                with open(file, 'r') as f:
+                    filelines.append(f.readlines())
+            except UnicodeDecodeError:
+                try:
+                    with open(file, 'r', encoding="utf-8") as f:
+                        filelines.append(f.readlines())
+                    encoding = "utf-8"
+                except:
+                    compare_bytes = True
+
+        if compare_bytes:
+            return compareTwoBinaryFiles(filepaths)
+        else:
+            return compareTwoTextFiles(filepaths, encoding)
+
+    def compareTwoBinaryFiles(filepaths):
+        filelines = []
+        for file in filepaths:
+            with open(file, 'rb') as f:
+                filelines.append(f.readlines())
+
+        exitCode = 0 
+        if hasattr(difflib, 'diff_bytes'):
+            # python 3.5 or newer
+            diffs = difflib.diff_bytes(difflib.unified_diff, filelines[0], filelines[1], filepaths[0].encode(), filepaths[1].encode())
+            diffs = [diff.decode() for diff in diffs]
+        else:
+            # python 2.7
+            func = difflib.unified_diff if unified_diff else difflib.context_diff
+            diffs = func(filelines[0], filelines[1], filepaths[0], filepaths[1])
+
+        for diff in diffs:
+            stdout.write(diff)
+            exitCode = 1
+        return exitCode
+
+    def compareTwoTextFiles(filepaths, encoding):
+        filelines = []
+        for file in filepaths:
+            with open(file, 'r', encoding=encoding) as f:
                 filelines.append(f.readlines())
 
         exitCode = 0 


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D43165.140661.patch
Type: text/x-patch
Size: 2163 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20180402/3f9ae41c/attachment.bin>


More information about the llvm-commits mailing list