[llvm] r374389 - [lit] Clean up internal diff's encoding handling
Joel E. Denny via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 10 10:39:41 PDT 2019
Author: jdenny
Date: Thu Oct 10 10:39:41 2019
New Revision: 374389
URL: http://llvm.org/viewvc/llvm-project?rev=374389&view=rev
Log:
[lit] Clean up internal diff's encoding handling
As suggested by rnk at D67643#1673043, instead of reading files
multiple times until an appropriate encoding is found, read them once
as binary, and then try to decode what was read.
For python >= 3.5, don't fail when attempting to decode the
`diff_bytes` output in order to print it.
Finally, add some tests for encoding handling.
Reviewed By: rnk
Differential Revision: https://reviews.llvm.org/D68664
Added:
llvm/trunk/utils/lit/tests/Inputs/shtest-shell/diff-encodings.txt
llvm/trunk/utils/lit/tests/Inputs/shtest-shell/diff-in.bin (with props)
llvm/trunk/utils/lit/tests/Inputs/shtest-shell/diff-in.utf16 (with props)
llvm/trunk/utils/lit/tests/Inputs/shtest-shell/diff-in.utf8
Modified:
llvm/trunk/utils/lit/lit/builtin_commands/diff.py
llvm/trunk/utils/lit/tests/max-failures.py
llvm/trunk/utils/lit/tests/shtest-shell.py
Modified: llvm/trunk/utils/lit/lit/builtin_commands/diff.py
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/utils/lit/lit/builtin_commands/diff.py?rev=374389&r1=374388&r2=374389&view=diff
==============================================================================
--- llvm/trunk/utils/lit/lit/builtin_commands/diff.py (original)
+++ llvm/trunk/utils/lit/lit/builtin_commands/diff.py Thu Oct 10 10:39:41 2019
@@ -1,6 +1,7 @@
import difflib
import functools
import getopt
+import locale
import os
import sys
@@ -24,37 +25,26 @@ def getDirTree(path, basedir=""):
return path, sorted(child_trees)
def compareTwoFiles(flags, filepaths):
- compare_bytes = False
- encoding = None
filelines = []
for file in filepaths:
- try:
- with open(file, 'r') as f:
- filelines.append(f.readlines())
- except UnicodeDecodeError:
- try:
- with io.open(file, 'r', encoding="utf-8") as f:
- filelines.append(f.readlines())
- encoding = "utf-8"
- except:
- compare_bytes = True
-
- if compare_bytes:
- return compareTwoBinaryFiles(flags, filepaths)
- else:
- return compareTwoTextFiles(flags, filepaths, encoding)
+ with open(file, 'rb') as file_bin:
+ filelines.append(file_bin.readlines())
-def compareTwoBinaryFiles(flags, filepaths):
- filelines = []
- for file in filepaths:
- with open(file, 'rb') as f:
- filelines.append(f.readlines())
+ try:
+ return compareTwoTextFiles(flags, filepaths, filelines,
+ locale.getpreferredencoding(False))
+ except UnicodeDecodeError:
+ try:
+ return compareTwoTextFiles(flags, filepaths, filelines, "utf-8")
+ except:
+ return compareTwoBinaryFiles(flags, filepaths, filelines)
+def compareTwoBinaryFiles(flags, filepaths, filelines):
exitCode = 0
if hasattr(difflib, 'diff_bytes'):
# python 3.5 or newer
diffs = difflib.diff_bytes(difflib.unified_diff, filelines[0], filelines[1], filepaths[0].encode(), filepaths[1].encode())
- diffs = [diff.decode() for diff in diffs]
+ diffs = [diff.decode(errors="backslashreplace") for diff in diffs]
else:
# python 2.7
if flags.unified_diff:
@@ -68,15 +58,14 @@ def compareTwoBinaryFiles(flags, filepat
exitCode = 1
return exitCode
-def compareTwoTextFiles(flags, filepaths, encoding):
+def compareTwoTextFiles(flags, filepaths, filelines_bin, encoding):
filelines = []
- for file in filepaths:
- if encoding is None:
- with open(file, 'r') as f:
- filelines.append(f.readlines())
- else:
- with io.open(file, 'r', encoding=encoding) as f:
- filelines.append(f.readlines())
+ for lines_bin in filelines_bin:
+ lines = []
+ for line_bin in lines_bin:
+ line = line_bin.decode(encoding=encoding)
+ lines.append(line)
+ filelines.append(lines)
exitCode = 0
def compose2(f, g):
Added: llvm/trunk/utils/lit/tests/Inputs/shtest-shell/diff-encodings.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/utils/lit/tests/Inputs/shtest-shell/diff-encodings.txt?rev=374389&view=auto
==============================================================================
--- llvm/trunk/utils/lit/tests/Inputs/shtest-shell/diff-encodings.txt (added)
+++ llvm/trunk/utils/lit/tests/Inputs/shtest-shell/diff-encodings.txt Thu Oct 10 10:39:41 2019
@@ -0,0 +1,9 @@
+# Check that diff falls back to binary mode if it cannot decode a file.
+
+# RUN: diff -u diff-in.bin diff-in.bin
+# RUN: diff -u diff-in.utf16 diff-in.bin && false || true
+# RUN: diff -u diff-in.utf8 diff-in.bin && false || true
+# RUN: diff -u diff-in.bin diff-in.utf8 && false || true
+
+# Fail so lit will print output.
+# RUN: false
Added: llvm/trunk/utils/lit/tests/Inputs/shtest-shell/diff-in.bin
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/utils/lit/tests/Inputs/shtest-shell/diff-in.bin?rev=374389&view=auto
==============================================================================
Binary file - no diff available.
Propchange: llvm/trunk/utils/lit/tests/Inputs/shtest-shell/diff-in.bin
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: llvm/trunk/utils/lit/tests/Inputs/shtest-shell/diff-in.utf16
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/utils/lit/tests/Inputs/shtest-shell/diff-in.utf16?rev=374389&view=auto
==============================================================================
Binary file - no diff available.
Propchange: llvm/trunk/utils/lit/tests/Inputs/shtest-shell/diff-in.utf16
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: llvm/trunk/utils/lit/tests/Inputs/shtest-shell/diff-in.utf8
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/utils/lit/tests/Inputs/shtest-shell/diff-in.utf8?rev=374389&view=auto
==============================================================================
--- llvm/trunk/utils/lit/tests/Inputs/shtest-shell/diff-in.utf8 (added)
+++ llvm/trunk/utils/lit/tests/Inputs/shtest-shell/diff-in.utf8 Thu Oct 10 10:39:41 2019
@@ -0,0 +1,3 @@
+foo
+bar
+baz
Modified: llvm/trunk/utils/lit/tests/max-failures.py
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/utils/lit/tests/max-failures.py?rev=374389&r1=374388&r2=374389&view=diff
==============================================================================
--- llvm/trunk/utils/lit/tests/max-failures.py (original)
+++ llvm/trunk/utils/lit/tests/max-failures.py Thu Oct 10 10:39:41 2019
@@ -8,7 +8,7 @@
#
# END.
-# CHECK: Failing Tests (27)
+# CHECK: Failing Tests (28)
# CHECK: Failing Tests (1)
# CHECK: Failing Tests (2)
# CHECK: error: Option '--max-failures' requires positive integer
Modified: llvm/trunk/utils/lit/tests/shtest-shell.py
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/utils/lit/tests/shtest-shell.py?rev=374389&r1=374388&r2=374389&view=diff
==============================================================================
--- llvm/trunk/utils/lit/tests/shtest-shell.py (original)
+++ llvm/trunk/utils/lit/tests/shtest-shell.py Thu Oct 10 10:39:41 2019
@@ -34,6 +34,58 @@
# CHECK: error: command failed with exit status: 127
# CHECK: ***
+
+# CHECK: FAIL: shtest-shell :: diff-encodings.txt
+# CHECK: *** TEST 'shtest-shell :: diff-encodings.txt' FAILED ***
+
+# CHECK: $ "diff" "-u" "diff-in.bin" "diff-in.bin"
+# CHECK-NOT: error
+
+# CHECK: $ "diff" "-u" "diff-in.utf16" "diff-in.bin"
+# CHECK: # command output:
+# CHECK-NEXT: ---
+# CHECK-NEXT: +++
+# CHECK-NEXT: @@
+# CHECK-NEXT: {{^ .f.o.o.$}}
+# CHECK-NEXT: {{^-.b.a.r.$}}
+# CHECK-NEXT: {{^\+.b.a.r..}}
+# CHECK-NEXT: {{^ .b.a.z.$}}
+# CHECK: error: command failed with exit status: 1
+# CHECK: $ "true"
+
+# CHECK: $ "diff" "-u" "diff-in.utf8" "diff-in.bin"
+# CHECK: # command output:
+# CHECK-NEXT: ---
+# CHECK-NEXT: +++
+# CHECK-NEXT: @@
+# CHECK-NEXT: -foo
+# CHECK-NEXT: -bar
+# CHECK-NEXT: -baz
+# CHECK-NEXT: {{^\+.f.o.o.$}}
+# CHECK-NEXT: {{^\+.b.a.r..}}
+# CHECK-NEXT: {{^\+.b.a.z.$}}
+# CHECK: error: command failed with exit status: 1
+# CHECK: $ "true"
+
+# CHECK: $ "diff" "-u" "diff-in.bin" "diff-in.utf8"
+# CHECK: # command output:
+# CHECK-NEXT: ---
+# CHECK-NEXT: +++
+# CHECK-NEXT: @@
+# CHECK-NEXT: {{^\-.f.o.o.$}}
+# CHECK-NEXT: {{^\-.b.a.r..}}
+# CHECK-NEXT: {{^\-.b.a.z.$}}
+# CHECK-NEXT: +foo
+# CHECK-NEXT: +bar
+# CHECK-NEXT: +baz
+# CHECK: error: command failed with exit status: 1
+# CHECK: $ "true"
+
+# CHECK: $ "false"
+
+# CHECK: ***
+
+
# CHECK: FAIL: shtest-shell :: diff-error-1.txt
# CHECK: *** TEST 'shtest-shell :: diff-error-1.txt' FAILED ***
# CHECK: $ "diff" "-B" "temp1.txt" "temp2.txt"
@@ -245,4 +297,4 @@
# CHECK: PASS: shtest-shell :: sequencing-0.txt
# CHECK: XFAIL: shtest-shell :: sequencing-1.txt
# CHECK: PASS: shtest-shell :: valid-shell.txt
-# CHECK: Failing Tests (27)
+# CHECK: Failing Tests (28)
More information about the llvm-commits
mailing list