[libcxx-commits] [libcxx] [llvm] [LIT] remove `to_unicode`, `to_string`, and `to_bytes` helpers (PR #165950)
Tomohiro Kashiwada via libcxx-commits
libcxx-commits at lists.llvm.org
Tue Nov 4 04:59:21 PST 2025
https://github.com/kikairoya updated https://github.com/llvm/llvm-project/pull/165950
>From 6252ecd8ac0e448d55b39d8c2b70bb033eefc753 Mon Sep 17 00:00:00 2001
From: kikairoya <kikairoya at gmail.com>
Date: Fri, 17 Oct 2025 20:41:22 +0900
Subject: [PATCH 1/8] [LIT] replace `lit.util.mkdir` with `pathlib.Path.mkdir`
---
llvm/utils/lit/lit/TestRunner.py | 15 ++++---
llvm/utils/lit/lit/util.py | 40 +++----------------
.../shtest-glob/example_dir1.input/empty | 0
.../shtest-glob/example_dir2.input/empty | 0
.../tests/Inputs/shtest-glob/glob-mkdir.txt | 5 +++
llvm/utils/lit/tests/shtest-glob.py | 9 +++--
6 files changed, 22 insertions(+), 47 deletions(-)
create mode 100644 llvm/utils/lit/tests/Inputs/shtest-glob/example_dir1.input/empty
create mode 100644 llvm/utils/lit/tests/Inputs/shtest-glob/example_dir2.input/empty
diff --git a/llvm/utils/lit/lit/TestRunner.py b/llvm/utils/lit/lit/TestRunner.py
index a7e2705f609af..f1346c257ec75 100644
--- a/llvm/utils/lit/lit/TestRunner.py
+++ b/llvm/utils/lit/lit/TestRunner.py
@@ -462,16 +462,15 @@ def executeBuiltinMkdir(cmd, cmd_shenv):
stderr = StringIO()
exitCode = 0
for dir in args:
- cwd = cmd_shenv.cwd
- dir = to_unicode(dir) if kIsWindows else to_bytes(dir)
- cwd = to_unicode(cwd) if kIsWindows else to_bytes(cwd)
- if not os.path.isabs(dir):
- dir = lit.util.abs_path_preserve_drive(os.path.join(cwd, dir))
+ dir = pathlib.Path(dir)
+ cwd = pathlib.Path(to_unicode(cmd_shenv.cwd))
+ if not dir.is_absolute():
+ dir = lit.util.abs_path_preserve_drive(cwd / dir)
if parent:
- lit.util.mkdir_p(dir)
+ dir.mkdir(parents=True, exist_ok=True)
else:
try:
- lit.util.mkdir(dir)
+ dir.mkdir(exist_ok=True)
except OSError as err:
stderr.write("Error: 'mkdir' command failed, %s\n" % str(err))
exitCode = 1
@@ -2395,7 +2394,7 @@ def runOnce(
return out, err, exitCode, timeoutInfo, status
# Create the output directory if it does not already exist.
- lit.util.mkdir_p(os.path.dirname(tmpBase))
+ pathlib.Path(tmpBase).parent.mkdir(parents=True, exist_ok=True)
# Re-run failed tests up to test.allowed_retries times.
execdir = os.path.dirname(test.getExecPath())
diff --git a/llvm/utils/lit/lit/util.py b/llvm/utils/lit/lit/util.py
index ce4c3c2df3436..e4e031b3e0898 100644
--- a/llvm/utils/lit/lit/util.py
+++ b/llvm/utils/lit/lit/util.py
@@ -5,6 +5,7 @@
import math
import numbers
import os
+import pathlib
import platform
import re
import signal
@@ -131,48 +132,17 @@ def abs_path_preserve_drive(path):
# Since Python 3.8, os.path.realpath resolves sustitute drives,
# so we should not use it. In Python 3.7, os.path.realpath
# was implemented as os.path.abspath.
+ if isinstance(path, pathlib.Path):
+ return path.absolute()
return os.path.abspath(path)
else:
# On UNIX, the current directory always has symbolic links resolved,
# so any program accepting relative paths cannot preserve symbolic
# links in paths and we should always use os.path.realpath.
+ if isinstance(path, pathlib.Path):
+ return path.resolve()
return os.path.realpath(path)
-def mkdir(path):
- try:
- if platform.system() == "Windows":
- from ctypes import windll
- from ctypes import GetLastError, WinError
-
- path = os.path.abspath(path)
- # Make sure that the path uses backslashes here, in case
- # python would have happened to use forward slashes, as the
- # NT path format only supports backslashes.
- path = path.replace("/", "\\")
- NTPath = to_unicode(r"\\?\%s" % path)
- if not windll.kernel32.CreateDirectoryW(NTPath, None):
- raise WinError(GetLastError())
- else:
- os.mkdir(path)
- except OSError:
- e = sys.exc_info()[1]
- # ignore EEXIST, which may occur during a race condition
- if e.errno != errno.EEXIST:
- raise
-
-
-def mkdir_p(path):
- """mkdir_p(path) - Make the "path" directory, if it does not exist; this
- will also make directories for any missing parent directories."""
- if not path or os.path.exists(path):
- return
-
- parent = os.path.dirname(path)
- if parent != path:
- mkdir_p(parent)
-
- mkdir(path)
-
def listdir_files(dirname, suffixes=None, exclude_filenames=None, prefixes=None):
"""Yields files in a directory.
diff --git a/llvm/utils/lit/tests/Inputs/shtest-glob/example_dir1.input/empty b/llvm/utils/lit/tests/Inputs/shtest-glob/example_dir1.input/empty
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/llvm/utils/lit/tests/Inputs/shtest-glob/example_dir2.input/empty b/llvm/utils/lit/tests/Inputs/shtest-glob/example_dir2.input/empty
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/llvm/utils/lit/tests/Inputs/shtest-glob/glob-mkdir.txt b/llvm/utils/lit/tests/Inputs/shtest-glob/glob-mkdir.txt
index d1329f5dbfaae..71972411bc4cf 100644
--- a/llvm/utils/lit/tests/Inputs/shtest-glob/glob-mkdir.txt
+++ b/llvm/utils/lit/tests/Inputs/shtest-glob/glob-mkdir.txt
@@ -1,2 +1,7 @@
## Tests glob pattern handling in the mkdir command.
+
+## This mkdir should fail because the `example_file*.input`s are regular files.
# RUN: not mkdir %S/example_file*.input
+
+## This mkdir should succeed (so RUN should fail) because the `example_dir*.input`s that already exist are directories.
+# RUN: not mkdir %S/example_dir*.input
diff --git a/llvm/utils/lit/tests/shtest-glob.py b/llvm/utils/lit/tests/shtest-glob.py
index ae90f31907d49..aa4705b634a7d 100644
--- a/llvm/utils/lit/tests/shtest-glob.py
+++ b/llvm/utils/lit/tests/shtest-glob.py
@@ -1,12 +1,13 @@
## Tests glob pattern handling in echo command.
# RUN: not %{lit} -a -v %{inputs}/shtest-glob \
-# RUN: | FileCheck -dump-input=fail -match-full-lines %s
-#
+# RUN: | FileCheck -dump-input=fail -match-full-lines --implicit-check-not=Error: %s
# END.
# CHECK: UNRESOLVED: shtest-glob :: glob-echo.txt ({{[^)]*}})
# CHECK: TypeError: string argument expected, got 'GlobItem'
-# CHECK: FAIL: shtest-glob :: glob-mkdir.txt ({{[^)]*}})
-# CHECK: # error: command failed with exit status: 1
+# CHECK: FAIL: shtest-glob :: glob-mkdir.txt ({{[^)]*}})
+# CHECK: # | Error: 'mkdir' command failed, {{.*}}example_file1.input'
+# CHECK-NEXT: # | Error: 'mkdir' command failed, {{.*}}example_file2.input'
+# CHECK: # error: command failed with exit status: 1
>From 4e82cd7f92c02b05324f4b2e10603b993ae5484e Mon Sep 17 00:00:00 2001
From: kikairoya <kikairoya at gmail.com>
Date: Sat, 1 Nov 2025 09:05:48 +0900
Subject: [PATCH 2/8] remove to_unicode
---
llvm/utils/lit/lit/TestRunner.py | 9 ++-------
llvm/utils/lit/lit/util.py | 14 --------------
2 files changed, 2 insertions(+), 21 deletions(-)
diff --git a/llvm/utils/lit/lit/TestRunner.py b/llvm/utils/lit/lit/TestRunner.py
index f1346c257ec75..89a71add93458 100644
--- a/llvm/utils/lit/lit/TestRunner.py
+++ b/llvm/utils/lit/lit/TestRunner.py
@@ -21,7 +21,7 @@
import lit.ShUtil as ShUtil
import lit.Test as Test
import lit.util
-from lit.util import to_bytes, to_string, to_unicode
+from lit.util import to_bytes, to_string
from lit.BooleanExpression import BooleanExpression
@@ -463,7 +463,7 @@ def executeBuiltinMkdir(cmd, cmd_shenv):
exitCode = 0
for dir in args:
dir = pathlib.Path(dir)
- cwd = pathlib.Path(to_unicode(cmd_shenv.cwd))
+ cwd = pathlib.Path(cmd_shenv.cwd)
if not dir.is_absolute():
dir = lit.util.abs_path_preserve_drive(cwd / dir)
if parent:
@@ -508,8 +508,6 @@ def on_rm_error(func, path, exc_info):
exitCode = 0
for path in args:
cwd = cmd_shenv.cwd
- path = to_unicode(path) if kIsWindows else to_bytes(path)
- cwd = to_unicode(cwd) if kIsWindows else to_bytes(cwd)
if not os.path.isabs(path):
path = lit.util.abs_path_preserve_drive(os.path.join(cwd, path))
if force and not os.path.exists(path):
@@ -703,9 +701,6 @@ def processRedirects(cmd, stdin_source, cmd_shenv, opened_files):
else:
# Make sure relative paths are relative to the cwd.
redir_filename = os.path.join(cmd_shenv.cwd, name)
- redir_filename = (
- to_unicode(redir_filename) if kIsWindows else to_bytes(redir_filename)
- )
fd = open(redir_filename, mode)
# Workaround a Win32 and/or subprocess bug when appending.
#
diff --git a/llvm/utils/lit/lit/util.py b/llvm/utils/lit/lit/util.py
index e4e031b3e0898..f85cd7fda0317 100644
--- a/llvm/utils/lit/lit/util.py
+++ b/llvm/utils/lit/lit/util.py
@@ -89,20 +89,6 @@ def to_string(b):
raise TypeError("not sure how to convert %s to %s" % (type(b), str))
-def to_unicode(s):
- """Return the parameter as type which supports unicode, possibly decoding
- it.
-
- In Python2, this is the unicode type. In Python3 it's the str type.
-
- """
- if isinstance(s, bytes):
- # In Python2, this branch is taken for both 'str' and 'bytes'.
- # In Python3, this branch is taken only for 'bytes'.
- return s.decode("utf-8")
- return s
-
-
def usable_core_count():
"""Return the number of cores the current process can use, if supported.
Otherwise, return the total number of cores (like `os.cpu_count()`).
>From fa490a657369d203b5d77b92e0934ba098acbb19 Mon Sep 17 00:00:00 2001
From: kikairoya <kikairoya at gmail.com>
Date: Sat, 1 Nov 2025 09:39:25 +0900
Subject: [PATCH 3/8] remove to_string from the runner
---
llvm/utils/lit/lit/TestRunner.py | 35 ++++++++++++++------------------
llvm/utils/lit/lit/util.py | 6 +++---
2 files changed, 18 insertions(+), 23 deletions(-)
diff --git a/llvm/utils/lit/lit/TestRunner.py b/llvm/utils/lit/lit/TestRunner.py
index 89a71add93458..2cbceae141f89 100644
--- a/llvm/utils/lit/lit/TestRunner.py
+++ b/llvm/utils/lit/lit/TestRunner.py
@@ -21,7 +21,7 @@
import lit.ShUtil as ShUtil
import lit.Test as Test
import lit.util
-from lit.util import to_bytes, to_string
+from lit.util import to_bytes
from lit.BooleanExpression import BooleanExpression
@@ -391,18 +391,14 @@ def executeBuiltinEcho(cmd, shenv):
# Some tests have un-redirected echo commands to help debug test failures.
# Buffer our output and return it to the caller.
is_redirected = True
- encode = lambda x: x
if stdout == subprocess.PIPE:
is_redirected = False
stdout = StringIO()
elif kIsWindows:
- # Reopen stdout in binary mode to avoid CRLF translation. The versions
- # of echo we are replacing on Windows all emit plain LF, and the LLVM
- # tests now depend on this.
- # When we open as binary, however, this also means that we have to write
- # 'bytes' objects to stdout instead of 'str' objects.
- encode = lit.util.to_bytes
- stdout = open(stdout.name, stdout.mode + "b")
+ # Reopen stdout with specifying `newline` to avoid CRLF translation.
+ # The versions of echo we are replacing on Windows all emit plain LF,
+ # and the LLVM tests now depend on this.
+ stdout = open(stdout.name, stdout.mode, newline='')
opened_files.append((None, None, stdout, None))
# Implement echo flags. We only support -e and -n, and not yet in
@@ -423,16 +419,15 @@ def maybeUnescape(arg):
if not interpret_escapes:
return arg
- arg = lit.util.to_bytes(arg)
- return arg.decode("unicode_escape")
+ return arg.encode("utf-8").decode("unicode_escape")
if args:
for arg in args[:-1]:
- stdout.write(encode(maybeUnescape(arg)))
- stdout.write(encode(" "))
- stdout.write(encode(maybeUnescape(args[-1])))
+ stdout.write(maybeUnescape(arg))
+ stdout.write(" ")
+ stdout.write(maybeUnescape(args[-1]))
if write_newline:
- stdout.write(encode("\n"))
+ stdout.write("\n")
for (name, mode, f, path) in opened_files:
f.close()
@@ -1062,14 +1057,14 @@ def _executeShCmd(cmd, shenv, results, timeoutHelper):
if out is None:
out = ""
else:
- out = to_string(out.decode("utf-8", errors="replace"))
+ out = out.decode("utf-8", errors="replace")
except:
out = str(out)
try:
if err is None:
err = ""
else:
- err = to_string(err.decode("utf-8", errors="replace"))
+ err = err.decode("utf-8", errors="replace")
except:
err = str(err)
@@ -1261,7 +1256,7 @@ def executeScriptInternal(
# Add the command output, if redirected.
for (name, path, data) in result.outputFiles:
- data = to_string(data.decode("utf-8", errors="replace"))
+ data = data.decode("utf-8", errors="replace")
out += formatOutput(f"redirected output from '{name}'", data, limit=1024)
if result.stdout.strip():
out += formatOutput("command stdout", result.stdout)
@@ -1471,8 +1466,8 @@ def parseIntegratedTestScriptCommands(source_path, keywords):
keyword, ln = match.groups()
yield (
line_number,
- to_string(keyword.decode("utf-8")),
- to_string(ln.decode("utf-8").rstrip("\r")),
+ keyword.decode("utf-8"),
+ ln.decode("utf-8").rstrip("\r"),
)
finally:
f.close()
diff --git a/llvm/utils/lit/lit/util.py b/llvm/utils/lit/lit/util.py
index f85cd7fda0317..ee5823ec0aa27 100644
--- a/llvm/utils/lit/lit/util.py
+++ b/llvm/utils/lit/lit/util.py
@@ -327,7 +327,7 @@ def executeCommand(
"""
if input is not None:
- input = to_bytes(input)
+ input = input.encode("utf-8")
err_out = subprocess.STDOUT if redirect_stderr else subprocess.PIPE
p = subprocess.Popen(
command,
@@ -363,8 +363,8 @@ def killProcess():
timerObject.cancel()
# Ensure the resulting output is always of string type.
- out = to_string(out)
- err = "" if redirect_stderr else to_string(err)
+ out = out.decode("utf-8", errors="replace")
+ err = "" if redirect_stderr else err.decode("utf-8", errors="replace")
if hitTimeOut[0]:
raise ExecuteCommandTimeoutException(
>From 14ca5b6fea26f1d816bcb1c9199fc116077b9cf3 Mon Sep 17 00:00:00 2001
From: kikairoya <kikairoya at gmail.com>
Date: Sat, 1 Nov 2025 09:46:44 +0900
Subject: [PATCH 4/8] remove all to_string
---
llvm/utils/lit/lit/builtin_commands/diff.py | 6 ++--
llvm/utils/lit/lit/formats/googletest.py | 2 +-
llvm/utils/lit/lit/llvm/config.py | 6 ++--
llvm/utils/lit/lit/util.py | 39 ---------------------
4 files changed, 6 insertions(+), 47 deletions(-)
diff --git a/llvm/utils/lit/lit/builtin_commands/diff.py b/llvm/utils/lit/lit/builtin_commands/diff.py
index f2b5869b35889..a32a31d50ada8 100644
--- a/llvm/utils/lit/lit/builtin_commands/diff.py
+++ b/llvm/utils/lit/lit/builtin_commands/diff.py
@@ -8,7 +8,6 @@
import sys
import util
-from util import to_string
class DiffFlags:
@@ -67,10 +66,9 @@ def compareTwoBinaryFiles(flags, filepaths, filelines):
filepaths[1].encode(),
n=flags.num_context_lines,
)
- diffs = [diff.decode(errors="backslashreplace") for diff in diffs]
for diff in diffs:
- sys.stdout.write(to_string(diff))
+ sys.stdout.write(diff.decode(errors="backslashreplace"))
exitCode = 1
return exitCode
@@ -117,7 +115,7 @@ def compose2(f, g):
filepaths[1],
n=flags.num_context_lines,
):
- sys.stdout.write(to_string(diff))
+ sys.stdout.write(diff)
exitCode = 1
return exitCode
diff --git a/llvm/utils/lit/lit/formats/googletest.py b/llvm/utils/lit/lit/formats/googletest.py
index 172cd0beee4a1..d7fb25c983a65 100644
--- a/llvm/utils/lit/lit/formats/googletest.py
+++ b/llvm/utils/lit/lit/formats/googletest.py
@@ -43,7 +43,7 @@ def get_num_tests(self, path, litConfig, localConfig):
return None
return sum(
map(
- lambda line: lit.util.to_string(line).startswith(" "),
+ lambda line: line.decode("utf-8", errors="replace").startswith(" "),
out.splitlines(False),
)
)
diff --git a/llvm/utils/lit/lit/llvm/config.py b/llvm/utils/lit/lit/llvm/config.py
index 913ba69d63328..497009848b563 100644
--- a/llvm/utils/lit/lit/llvm/config.py
+++ b/llvm/utils/lit/lit/llvm/config.py
@@ -223,7 +223,7 @@ def _find_git_windows_unix_tools(self, tools_needed):
continue
# We found it, stop enumerating.
- return lit.util.to_string(candidate_path)
+ return candidate_path
except:
continue
@@ -284,8 +284,8 @@ def get_process_output(self, command):
env=self.config.environment,
)
stdout, stderr = cmd.communicate()
- stdout = lit.util.to_string(stdout)
- stderr = lit.util.to_string(stderr)
+ stdout = stdout.decode("utf-8", errors="replace")
+ stderr = stderr.decode("utf-8", errors="replace")
return (stdout, stderr)
except OSError:
self.lit_config.fatal("Could not run process %s" % command)
diff --git a/llvm/utils/lit/lit/util.py b/llvm/utils/lit/lit/util.py
index ee5823ec0aa27..341f7268f7ea5 100644
--- a/llvm/utils/lit/lit/util.py
+++ b/llvm/utils/lit/lit/util.py
@@ -50,45 +50,6 @@ def to_bytes(s):
return s.encode("utf-8")
-def to_string(b):
- """Return the parameter as type 'str', possibly encoding it.
-
- In Python2, the 'str' type is the same as 'bytes'. In Python3, the
- 'str' type is (essentially) Python2's 'unicode' type, and 'bytes' is
- distinct.
-
- """
- if isinstance(b, str):
- # In Python2, this branch is taken for types 'str' and 'bytes'.
- # In Python3, this branch is taken only for 'str'.
- return b
- if isinstance(b, bytes):
- # In Python2, this branch is never taken ('bytes' is handled as 'str').
- # In Python3, this is true only for 'bytes'.
- try:
- return b.decode("utf-8")
- except UnicodeDecodeError:
- # If the value is not valid Unicode, return the default
- # repr-line encoding.
- return str(b)
-
- # By this point, here's what we *don't* have:
- #
- # - In Python2:
- # - 'str' or 'bytes' (1st branch above)
- # - In Python3:
- # - 'str' (1st branch above)
- # - 'bytes' (2nd branch above)
- #
- # The last type we might expect is the Python2 'unicode' type. There is no
- # 'unicode' type in Python3 (all the Python3 cases were already handled). In
- # order to get a 'str' object, we need to encode the 'unicode' object.
- try:
- return b.encode("utf-8")
- except AttributeError:
- raise TypeError("not sure how to convert %s to %s" % (type(b), str))
-
-
def usable_core_count():
"""Return the number of cores the current process can use, if supported.
Otherwise, return the total number of cores (like `os.cpu_count()`).
>From 347435283d81c3714cd665777dde261c52d23978 Mon Sep 17 00:00:00 2001
From: kikairoya <kikairoya at gmail.com>
Date: Sat, 1 Nov 2025 09:56:38 +0900
Subject: [PATCH 5/8] remove to_bytes
---
llvm/utils/lit/lit/TestRunner.py | 25 +++++++------------------
llvm/utils/lit/lit/util.py | 17 -----------------
2 files changed, 7 insertions(+), 35 deletions(-)
diff --git a/llvm/utils/lit/lit/TestRunner.py b/llvm/utils/lit/lit/TestRunner.py
index 2cbceae141f89..f0fb5ba30ca4b 100644
--- a/llvm/utils/lit/lit/TestRunner.py
+++ b/llvm/utils/lit/lit/TestRunner.py
@@ -21,7 +21,6 @@
import lit.ShUtil as ShUtil
import lit.Test as Test
import lit.util
-from lit.util import to_bytes
from lit.BooleanExpression import BooleanExpression
@@ -1419,19 +1418,11 @@ def parseIntegratedTestScriptCommands(source_path, keywords):
(line_number, command_type, line).
"""
- # This code is carefully written to be dual compatible with Python 2.5+ and
- # Python 3 without requiring input files to always have valid codings. The
- # trick we use is to open the file in binary mode and use the regular
- # expression library to find the commands, with it scanning strings in
- # Python2 and bytes in Python3.
- #
- # Once we find a match, we do require each script line to be decodable to
- # UTF-8, so we convert the outputs to UTF-8 before returning. This way the
- # remaining code can work with "strings" agnostic of the executing Python
- # version.
+ # We use `bytes` for scanning input files to avoid requiring them to always
+ # have valid codings.
keywords_re = re.compile(
- to_bytes("(%s)(.*)\n" % ("|".join(re.escape(k) for k in keywords),))
+ b"(%s)(.*)\n" % (b"|".join(re.escape(k.encode("utf-8")) for k in keywords),)
)
f = open(source_path, "rb")
@@ -1440,8 +1431,8 @@ def parseIntegratedTestScriptCommands(source_path, keywords):
data = f.read()
# Ensure the data ends with a newline.
- if not data.endswith(to_bytes("\n")):
- data = data + to_bytes("\n")
+ if not data.endswith(b"\n"):
+ data = data + b"\n"
# Iterate over the matches.
line_number = 1
@@ -1451,14 +1442,12 @@ def parseIntegratedTestScriptCommands(source_path, keywords):
# newlines.
match_position = match.start()
line_number += data.count(
- to_bytes("\n"), last_match_position, match_position
+ b"\n", last_match_position, match_position
)
last_match_position = match_position
# Convert the keyword and line to UTF-8 strings and yield the
- # command. Note that we take care to return regular strings in
- # Python 2, to avoid other code having to differentiate between the
- # str and unicode types.
+ # command.
#
# Opening the file in binary mode prevented Windows \r newline
# characters from being converted to Unix \n newlines, so manually
diff --git a/llvm/utils/lit/lit/util.py b/llvm/utils/lit/lit/util.py
index 341f7268f7ea5..7815c361a8b7d 100644
--- a/llvm/utils/lit/lit/util.py
+++ b/llvm/utils/lit/lit/util.py
@@ -33,23 +33,6 @@ def make_word_regex(word):
return r"\b" + word + r"\b"
-def to_bytes(s):
- """Return the parameter as type 'bytes', possibly encoding it.
-
- In Python2, the 'bytes' type is the same as 'str'. In Python3, they
- are distinct.
-
- """
- if isinstance(s, bytes):
- # In Python2, this branch is taken for both 'str' and 'bytes'.
- # In Python3, this branch is taken only for 'bytes'.
- return s
- # In Python2, 's' is a 'unicode' object.
- # In Python3, 's' is a 'str' object.
- # Encode to UTF-8 to get 'bytes' data.
- return s.encode("utf-8")
-
-
def usable_core_count():
"""Return the number of cores the current process can use, if supported.
Otherwise, return the total number of cores (like `os.cpu_count()`).
>From f05fa2d592a6233482bf37f5a4ed75d96cbd30eb Mon Sep 17 00:00:00 2001
From: kikairoya <kikairoya at gmail.com>
Date: Sat, 1 Nov 2025 10:29:35 +0900
Subject: [PATCH 6/8] format
---
llvm/utils/lit/lit/TestRunner.py | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/llvm/utils/lit/lit/TestRunner.py b/llvm/utils/lit/lit/TestRunner.py
index f0fb5ba30ca4b..468c3276cc1ce 100644
--- a/llvm/utils/lit/lit/TestRunner.py
+++ b/llvm/utils/lit/lit/TestRunner.py
@@ -397,7 +397,7 @@ def executeBuiltinEcho(cmd, shenv):
# Reopen stdout with specifying `newline` to avoid CRLF translation.
# The versions of echo we are replacing on Windows all emit plain LF,
# and the LLVM tests now depend on this.
- stdout = open(stdout.name, stdout.mode, newline='')
+ stdout = open(stdout.name, stdout.mode, newline="")
opened_files.append((None, None, stdout, None))
# Implement echo flags. We only support -e and -n, and not yet in
@@ -1441,9 +1441,7 @@ def parseIntegratedTestScriptCommands(source_path, keywords):
# Compute the updated line number by counting the intervening
# newlines.
match_position = match.start()
- line_number += data.count(
- b"\n", last_match_position, match_position
- )
+ line_number += data.count(b"\n", last_match_position, match_position)
last_match_position = match_position
# Convert the keyword and line to UTF-8 strings and yield the
>From dc3f0c747f2722957e71d2c1349cb3ad8d2a142f Mon Sep 17 00:00:00 2001
From: kikairoya <kikairoya at gmail.com>
Date: Sat, 1 Nov 2025 11:03:16 +0900
Subject: [PATCH 7/8] replace a use in libcxx
---
libcxx/test/selftest/dsl/lit.local.cfg | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/libcxx/test/selftest/dsl/lit.local.cfg b/libcxx/test/selftest/dsl/lit.local.cfg
index dc6887ad7e48b..73e1c4db9ca4e 100644
--- a/libcxx/test/selftest/dsl/lit.local.cfg
+++ b/libcxx/test/selftest/dsl/lit.local.cfg
@@ -10,6 +10,6 @@
# within the test.
import base64, lit.util, pickle
-base64Encode = lambda s: lit.util.to_string(base64.b64encode(lit.util.to_bytes(s)))
+base64Encode = lambda s: base64.b64encode(s).decode("utf-8")
escapedSubstitutions = base64Encode(pickle.dumps(config.substitutions))
config.substitutions.append(("%{substitutions}", escapedSubstitutions))
>From 10de4e1ae5ab7db632f3a6a570447e6fc588727d Mon Sep 17 00:00:00 2001
From: kikairoya <kikairoya at gmail.com>
Date: Sat, 1 Nov 2025 13:08:36 +0900
Subject: [PATCH 8/8] set encoding for open()
---
llvm/utils/lit/lit/TestRunner.py | 32 ++++++++++++--------------------
llvm/utils/lit/lit/reports.py | 4 ++--
2 files changed, 14 insertions(+), 22 deletions(-)
diff --git a/llvm/utils/lit/lit/TestRunner.py b/llvm/utils/lit/lit/TestRunner.py
index 468c3276cc1ce..2220354e3fa42 100644
--- a/llvm/utils/lit/lit/TestRunner.py
+++ b/llvm/utils/lit/lit/TestRunner.py
@@ -397,7 +397,7 @@ def executeBuiltinEcho(cmd, shenv):
# Reopen stdout with specifying `newline` to avoid CRLF translation.
# The versions of echo we are replacing on Windows all emit plain LF,
# and the LLVM tests now depend on this.
- stdout = open(stdout.name, stdout.mode, newline="")
+ stdout = open(stdout.name, stdout.mode, encoding="utf-8", newline="")
opened_files.append((None, None, stdout, None))
# Implement echo flags. We only support -e and -n, and not yet in
@@ -695,7 +695,7 @@ def processRedirects(cmd, stdin_source, cmd_shenv, opened_files):
else:
# Make sure relative paths are relative to the cwd.
redir_filename = os.path.join(cmd_shenv.cwd, name)
- fd = open(redir_filename, mode)
+ fd = open(redir_filename, mode, encoding="utf-8")
# Workaround a Win32 and/or subprocess bug when appending.
#
# FIXME: Actually, this is probably an instance of PR6753.
@@ -1311,13 +1311,6 @@ def executeScript(test, litConfig, tmpBase, commands, cwd):
script += ".bat"
# Write script file
- mode = "w"
- open_kwargs = {}
- if litConfig.isWindows and not isWin32CMDEXE:
- mode += "b" # Avoid CRLFs when writing bash scripts.
- else:
- open_kwargs["encoding"] = "utf-8"
- f = open(script, mode, **open_kwargs)
if isWin32CMDEXE:
for i, ln in enumerate(commands):
match = re.fullmatch(kPdbgRegex, ln)
@@ -1326,8 +1319,9 @@ def executeScript(test, litConfig, tmpBase, commands, cwd):
commands[i] = match.expand(
"echo '\\1' > nul && " if command else "echo '\\1' > nul"
)
- f.write("@echo on\n")
- f.write("\n at if %ERRORLEVEL% NEQ 0 EXIT\n".join(commands))
+ with open(script, "w", encoding="utf-8") as f:
+ f.write("@echo on\n")
+ f.write("\n at if %ERRORLEVEL% NEQ 0 EXIT\n".join(commands))
else:
for i, ln in enumerate(commands):
match = re.fullmatch(kPdbgRegex, ln)
@@ -1366,8 +1360,6 @@ def executeScript(test, litConfig, tmpBase, commands, cwd):
# seen the latter manage to terminate the shell running lit.
if command:
commands[i] += f" && {{ {command}; }}"
- if test.config.pipefail:
- f.write(b"set -o pipefail;" if mode == "wb" else "set -o pipefail;")
# Manually export any DYLD_* variables used by dyld on macOS because
# otherwise they are lost when the shell executable is run, before the
@@ -1377,14 +1369,14 @@ def executeScript(test, litConfig, tmpBase, commands, cwd):
for k, v in test.config.environment.items()
if k.startswith("DYLD_")
)
- f.write(bytes(env_str, "utf-8") if mode == "wb" else env_str)
- f.write(b"set -x;" if mode == "wb" else "set -x;")
- if mode == "wb":
- f.write(bytes("{ " + "; } &&\n{ ".join(commands) + "; }", "utf-8"))
- else:
+
+ with open(script, "w", encoding="utf-8", newline="") as f:
+ if test.config.pipefail:
+ f.write("set -o pipefail;")
+ f.write(env_str)
+ f.write("set -x;")
f.write("{ " + "; } &&\n{ ".join(commands) + "; }")
- f.write(b"\n" if mode == "wb" else "\n")
- f.close()
+ f.write("\n")
if isWin32CMDEXE:
command = ["cmd", "/c", script]
diff --git a/llvm/utils/lit/lit/reports.py b/llvm/utils/lit/lit/reports.py
index 1b43ab9357b37..6f8a782a40aa8 100755
--- a/llvm/utils/lit/lit/reports.py
+++ b/llvm/utils/lit/lit/reports.py
@@ -29,10 +29,10 @@ def write_results(self, tests, elapsed):
fd, _ = tempfile.mkstemp(
suffix=ext, prefix=f"{filename}.", dir=os.path.dirname(self.output_file)
)
- report_file = os.fdopen(fd, "w")
+ report_file = os.fdopen(fd, "w", encoding="utf-8")
else:
# Overwrite if the results already exist.
- report_file = open(self.output_file, "w")
+ report_file = open(self.output_file, "w", encoding="utf-8")
with report_file:
self._write_results_to_file(tests, elapsed, report_file)
More information about the libcxx-commits
mailing list