[llvm] r375018 - [lit] Clean up internal diff's encoding handling

Joel E. Denny via llvm-commits llvm-commits at lists.llvm.org
Wed Oct 16 10:21:25 PDT 2019


Author: jdenny
Date: Wed Oct 16 10:21:24 2019
New Revision: 375018

URL: http://llvm.org/viewvc/llvm-project?rev=375018&view=rev
Log:
[lit] Clean up internal diff's encoding handling

As suggested by rnk at D67643#1673043, instead of reading files
multiple times until an appropriate encoding is found, read them once
as binary, and then try to decode what was read.

For Python >= 3.5, don't fail when attempting to decode the
`diff_bytes` output in order to print it.

Avoid failures for Python 2.7 used on some Windows bots by
transforming diff output with `lit.util.to_string` before writing it
to stdout.

Finally, add some tests for encoding handling.

Reviewed By: rnk

Differential Revision: https://reviews.llvm.org/D68664

Added:
    llvm/trunk/utils/lit/tests/Inputs/shtest-shell/diff-encodings.txt
    llvm/trunk/utils/lit/tests/Inputs/shtest-shell/diff-in.bin   (with props)
    llvm/trunk/utils/lit/tests/Inputs/shtest-shell/diff-in.utf16   (with props)
    llvm/trunk/utils/lit/tests/Inputs/shtest-shell/diff-in.utf8
Modified:
    llvm/trunk/utils/lit/lit/TestRunner.py
    llvm/trunk/utils/lit/tests/max-failures.py
    llvm/trunk/utils/lit/tests/shtest-shell.py

Modified: llvm/trunk/utils/lit/lit/TestRunner.py
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/utils/lit/lit/TestRunner.py?rev=375018&r1=375017&r2=375018&view=diff
==============================================================================
--- llvm/trunk/utils/lit/lit/TestRunner.py (original)
+++ llvm/trunk/utils/lit/lit/TestRunner.py Wed Oct 16 10:21:24 2019
@@ -5,6 +5,7 @@ import functools
 import io
 import itertools
 import getopt
+import locale
 import os, signal, subprocess, sys
 import re
 import stat
@@ -415,32 +416,21 @@ def executeBuiltinDiff(cmd, cmd_shenv):
             return path, sorted(child_trees)
 
     def compareTwoFiles(filepaths):
-        compare_bytes = False
-        encoding = None
         filelines = []
         for file in filepaths:
-            try:
-                with open(file, 'r') as f:
-                    filelines.append(f.readlines())
-            except UnicodeDecodeError:
-                try:
-                    with io.open(file, 'r', encoding="utf-8") as f:
-                        filelines.append(f.readlines())
-                    encoding = "utf-8"
-                except:
-                    compare_bytes = True
-
-        if compare_bytes:
-            return compareTwoBinaryFiles(filepaths)
-        else:
-            return compareTwoTextFiles(filepaths, encoding)
+            with open(file, 'rb') as file_bin:
+                filelines.append(file_bin.readlines())
 
-    def compareTwoBinaryFiles(filepaths):
-        filelines = []
-        for file in filepaths:
-            with open(file, 'rb') as f:
-                filelines.append(f.readlines())
+        try:
+            return compareTwoTextFiles(filepaths, filelines,
+                                       locale.getpreferredencoding(False))
+        except UnicodeDecodeError:
+            try:
+                return compareTwoTextFiles(filepaths, filelines, "utf-8")
+            except:
+                return compareTwoBinaryFiles(filepaths, filelines)
 
+    def compareTwoBinaryFiles(filepaths, filelines):
         exitCode = 0
         if hasattr(difflib, 'diff_bytes'):
             # python 3.5 or newer
@@ -448,7 +438,7 @@ def executeBuiltinDiff(cmd, cmd_shenv):
                                        filelines[1], filepaths[0].encode(),
                                        filepaths[1].encode(),
                                        n = num_context_lines)
-            diffs = [diff.decode() for diff in diffs]
+            diffs = [diff.decode(errors="backslashreplace") for diff in diffs]
         else:
             # python 2.7
             func = difflib.unified_diff if unified_diff else difflib.context_diff
@@ -456,19 +446,18 @@ def executeBuiltinDiff(cmd, cmd_shenv):
                          n = num_context_lines)
 
         for diff in diffs:
-            stdout.write(diff)
+            stdout.write(to_string(diff))
             exitCode = 1
         return exitCode
 
-    def compareTwoTextFiles(filepaths, encoding):
+    def compareTwoTextFiles(filepaths, filelines_bin, encoding):
         filelines = []
-        for file in filepaths:
-            if encoding is None:
-                with open(file, 'r') as f:
-                    filelines.append(f.readlines())
-            else:
-                with io.open(file, 'r', encoding=encoding) as f:
-                    filelines.append(f.readlines())
+        for lines_bin in filelines_bin:
+            lines = []
+            for line_bin in lines_bin:
+                line = line_bin.decode(encoding=encoding)
+                lines.append(line)
+            filelines.append(lines)
 
         exitCode = 0
         def compose2(f, g):
@@ -488,7 +477,7 @@ def executeBuiltinDiff(cmd, cmd_shenv):
         func = difflib.unified_diff if unified_diff else difflib.context_diff
         for diff in func(filelines[0], filelines[1], filepaths[0], filepaths[1],
                          n = num_context_lines):
-            stdout.write(diff)
+            stdout.write(to_string(diff))
             exitCode = 1
         return exitCode
 

Added: llvm/trunk/utils/lit/tests/Inputs/shtest-shell/diff-encodings.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/utils/lit/tests/Inputs/shtest-shell/diff-encodings.txt?rev=375018&view=auto
==============================================================================
--- llvm/trunk/utils/lit/tests/Inputs/shtest-shell/diff-encodings.txt (added)
+++ llvm/trunk/utils/lit/tests/Inputs/shtest-shell/diff-encodings.txt Wed Oct 16 10:21:24 2019
@@ -0,0 +1,9 @@
+# Check that diff falls back to binary mode if it cannot decode a file.
+
+# RUN: diff -u diff-in.bin diff-in.bin
+# RUN: diff -u diff-in.utf16 diff-in.bin && false || true
+# RUN: diff -u diff-in.utf8 diff-in.bin && false || true
+# RUN: diff -u diff-in.bin diff-in.utf8 && false || true
+
+# Fail so lit will print output.
+# RUN: false

Added: llvm/trunk/utils/lit/tests/Inputs/shtest-shell/diff-in.bin
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/utils/lit/tests/Inputs/shtest-shell/diff-in.bin?rev=375018&view=auto
==============================================================================
Binary file - no diff available.

Propchange: llvm/trunk/utils/lit/tests/Inputs/shtest-shell/diff-in.bin
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: llvm/trunk/utils/lit/tests/Inputs/shtest-shell/diff-in.utf16
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/utils/lit/tests/Inputs/shtest-shell/diff-in.utf16?rev=375018&view=auto
==============================================================================
Binary file - no diff available.

Propchange: llvm/trunk/utils/lit/tests/Inputs/shtest-shell/diff-in.utf16
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: llvm/trunk/utils/lit/tests/Inputs/shtest-shell/diff-in.utf8
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/utils/lit/tests/Inputs/shtest-shell/diff-in.utf8?rev=375018&view=auto
==============================================================================
--- llvm/trunk/utils/lit/tests/Inputs/shtest-shell/diff-in.utf8 (added)
+++ llvm/trunk/utils/lit/tests/Inputs/shtest-shell/diff-in.utf8 Wed Oct 16 10:21:24 2019
@@ -0,0 +1,3 @@
+foo
+bar
+baz

Modified: llvm/trunk/utils/lit/tests/max-failures.py
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/utils/lit/tests/max-failures.py?rev=375018&r1=375017&r2=375018&view=diff
==============================================================================
--- llvm/trunk/utils/lit/tests/max-failures.py (original)
+++ llvm/trunk/utils/lit/tests/max-failures.py Wed Oct 16 10:21:24 2019
@@ -8,7 +8,7 @@
 #
 # END.
 
-# CHECK: Failing Tests (30)
+# CHECK: Failing Tests (31)
 # CHECK: Failing Tests (1)
 # CHECK: Failing Tests (2)
 # CHECK: error: argument --max-failures: requires positive integer, but found '0'

Modified: llvm/trunk/utils/lit/tests/shtest-shell.py
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/utils/lit/tests/shtest-shell.py?rev=375018&r1=375017&r2=375018&view=diff
==============================================================================
--- llvm/trunk/utils/lit/tests/shtest-shell.py (original)
+++ llvm/trunk/utils/lit/tests/shtest-shell.py Wed Oct 16 10:21:24 2019
@@ -34,6 +34,58 @@
 # CHECK: error: command failed with exit status: 127
 # CHECK: ***
 
+
+# CHECK: FAIL: shtest-shell :: diff-encodings.txt
+# CHECK: *** TEST 'shtest-shell :: diff-encodings.txt' FAILED ***
+
+# CHECK: $ "diff" "-u" "diff-in.bin" "diff-in.bin"
+# CHECK-NOT: error
+
+# CHECK: $ "diff" "-u" "diff-in.utf16" "diff-in.bin"
+# CHECK: # command output:
+# CHECK-NEXT: ---
+# CHECK-NEXT: +++
+# CHECK-NEXT: @@
+# CHECK-NEXT: {{^ .f.o.o.$}}
+# CHECK-NEXT: {{^-.b.a.r.$}}
+# CHECK-NEXT: {{^\+.b.a.r..}}
+# CHECK-NEXT: {{^ .b.a.z.$}}
+# CHECK: error: command failed with exit status: 1
+# CHECK: $ "true"
+
+# CHECK: $ "diff" "-u" "diff-in.utf8" "diff-in.bin"
+# CHECK: # command output:
+# CHECK-NEXT: ---
+# CHECK-NEXT: +++
+# CHECK-NEXT: @@
+# CHECK-NEXT: -foo
+# CHECK-NEXT: -bar
+# CHECK-NEXT: -baz
+# CHECK-NEXT: {{^\+.f.o.o.$}}
+# CHECK-NEXT: {{^\+.b.a.r..}}
+# CHECK-NEXT: {{^\+.b.a.z.$}}
+# CHECK: error: command failed with exit status: 1
+# CHECK: $ "true"
+
+# CHECK: $ "diff" "-u" "diff-in.bin" "diff-in.utf8"
+# CHECK: # command output:
+# CHECK-NEXT: ---
+# CHECK-NEXT: +++
+# CHECK-NEXT: @@
+# CHECK-NEXT: {{^\-.f.o.o.$}}
+# CHECK-NEXT: {{^\-.b.a.r..}}
+# CHECK-NEXT: {{^\-.b.a.z.$}}
+# CHECK-NEXT: +foo
+# CHECK-NEXT: +bar
+# CHECK-NEXT: +baz
+# CHECK: error: command failed with exit status: 1
+# CHECK: $ "true"
+
+# CHECK: $ "false"
+
+# CHECK: ***
+
+
 # CHECK: FAIL: shtest-shell :: diff-error-0.txt
 # CHECK: *** TEST 'shtest-shell :: diff-error-0.txt' FAILED ***
 # CHECK: $ "diff" "diff-error-0.txt" "diff-error-0.txt"
@@ -308,4 +360,4 @@
 # CHECK: PASS: shtest-shell :: sequencing-0.txt
 # CHECK: XFAIL: shtest-shell :: sequencing-1.txt
 # CHECK: PASS: shtest-shell :: valid-shell.txt
-# CHECK: Failing Tests (30)
+# CHECK: Failing Tests (31)




More information about the llvm-commits mailing list