[clang-tools-extra] 307b1fd - [clang-tidy] Always open files using UTF-8 encoding
Andy Yankovsky via cfe-commits
cfe-commits at lists.llvm.org
Mon Aug 2 02:37:38 PDT 2021
Author: Andy Yankovsky
Date: 2021-08-02T11:36:04+02:00
New Revision: 307b1fddd4d84b559b154ff7744ae68bf9c6f503
URL: https://github.com/llvm/llvm-project/commit/307b1fddd4d84b559b154ff7744ae68bf9c6f503
DIFF: https://github.com/llvm/llvm-project/commit/307b1fddd4d84b559b154ff7744ae68bf9c6f503.diff
LOG: [clang-tidy] Always open files using UTF-8 encoding
The encoding used for opening files depends on the OS and might be different
from UTF-8 (e.g. on Windows it can be CP-1252). The documentation files use
UTF-8 and might be incompatible with other encodings. For example, right now
`clang-tools-extra/docs/clang-tidy/checks/abseil-no-internal-dependencies.rst`
has non-ASCII quotes and running `add_new_check.py` fails on Windows, because
it tries to read the file with incompatible encoding.
Use `io.open` for compatibility with both Python 2 and Python 3.
Reviewed By: kbobyrev
Differential Revision: https://reviews.llvm.org/D106792
Added:
Modified:
clang-tools-extra/clang-tidy/add_new_check.py
clang-tools-extra/clang-tidy/rename_check.py
Removed:
################################################################################
diff --git a/clang-tools-extra/clang-tidy/add_new_check.py b/clang-tools-extra/clang-tidy/add_new_check.py
index 14fcfe8d49ff..9239ca5953cd 100755
--- a/clang-tools-extra/clang-tidy/add_new_check.py
+++ b/clang-tools-extra/clang-tidy/add_new_check.py
@@ -11,16 +11,21 @@
from __future__ import print_function
import argparse
+import io
import os
import re
import sys
-
# Adapts the module's CMakelist file. Returns 'True' if it could add a new
# entry and 'False' if the entry already existed.
def adapt_cmake(module_path, check_name_camel):
filename = os.path.join(module_path, 'CMakeLists.txt')
- with open(filename, 'r') as f:
+
+ # The documentation files are encoded using UTF-8, however on Windows the
+ # default encoding might be
diff erent (e.g. CP-1252). To make sure UTF-8 is
+ # always used, use `io.open(filename, mode, encoding='utf8')` for reading and
+ # writing files here and elsewhere.
+ with io.open(filename, 'r', encoding='utf8') as f:
lines = f.readlines()
cpp_file = check_name_camel + '.cpp'
@@ -31,7 +36,7 @@ def adapt_cmake(module_path, check_name_camel):
return False
print('Updating %s...' % filename)
- with open(filename, 'w') as f:
+ with io.open(filename, 'w', encoding='utf8') as f:
cpp_found = False
file_added = False
for line in lines:
@@ -51,7 +56,7 @@ def write_header(module_path, module, namespace, check_name, check_name_camel):
check_name_dashes = module + '-' + check_name
filename = os.path.join(module_path, check_name_camel) + '.h'
print('Creating %s...' % filename)
- with open(filename, 'w') as f:
+ with io.open(filename, 'w', encoding='utf8') as f:
header_guard = ('LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_' + module.upper() + '_'
+ check_name_camel.upper() + '_H')
f.write('//===--- ')
@@ -104,7 +109,7 @@ class %(check_name)s : public ClangTidyCheck {
def write_implementation(module_path, module, namespace, check_name_camel):
filename = os.path.join(module_path, check_name_camel) + '.cpp'
print('Creating %s...' % filename)
- with open(filename, 'w') as f:
+ with io.open(filename, 'w', encoding='utf8') as f:
f.write('//===--- ')
f.write(os.path.basename(filename))
f.write(' - clang-tidy ')
@@ -158,11 +163,11 @@ def adapt_module(module_path, module, check_name, check_name_camel):
lambda p: p.lower() == module.lower() + 'tidymodule.cpp',
os.listdir(module_path)))[0]
filename = os.path.join(module_path, modulecpp)
- with open(filename, 'r') as f:
+ with io.open(filename, 'r', encoding='utf8') as f:
lines = f.readlines()
print('Updating %s...' % filename)
- with open(filename, 'w') as f:
+ with io.open(filename, 'w', encoding='utf8') as f:
header_added = False
header_found = False
check_added = False
@@ -217,7 +222,7 @@ def add_release_notes(module_path, module, check_name):
check_name_dashes = module + '-' + check_name
filename = os.path.normpath(os.path.join(module_path,
'../../docs/ReleaseNotes.rst'))
- with open(filename, 'r') as f:
+ with io.open(filename, 'r', encoding='utf8') as f:
lines = f.readlines()
lineMatcher = re.compile('New checks')
@@ -225,7 +230,7 @@ def add_release_notes(module_path, module, check_name):
checkMatcher = re.compile('- New :doc:`(.*)')
print('Updating %s...' % filename)
- with open(filename, 'w') as f:
+ with io.open(filename, 'w', encoding='utf8') as f:
note_added = False
header_found = False
add_note_here = False
@@ -271,7 +276,7 @@ def write_test(module_path, module, check_name, test_extension):
filename = os.path.normpath(os.path.join(module_path, '../../test/clang-tidy/checkers',
check_name_dashes + '.' + test_extension))
print('Creating %s...' % filename)
- with open(filename, 'w') as f:
+ with io.open(filename, 'w', encoding='utf8') as f:
f.write("""// RUN: %%check_clang_tidy %%s %(check_name_dashes)s %%t
// FIXME: Add something that triggers the check here.
@@ -307,7 +312,7 @@ def update_checks_list(clang_tidy_path):
docs_dir = os.path.join(clang_tidy_path, '../docs/clang-tidy/checks')
filename = os.path.normpath(os.path.join(docs_dir, 'list.rst'))
# Read the content of the current list.rst file
- with open(filename, 'r') as f:
+ with io.open(filename, 'r', encoding='utf8') as f:
lines = f.readlines()
# Get all existing docs
doc_files = list(filter(lambda s: s.endswith('.rst') and s != 'list.rst',
@@ -323,7 +328,7 @@ def has_auto_fix(check_name):
if not os.path.isfile(checkerCode):
return ""
- with open(checkerCode) as f:
+ with io.open(checkerCode, encoding='utf8') as f:
code = f.read()
if 'FixItHint' in code or "ReplacementText" in code or "fixit" in code:
# Some simple heuristics to figure out if a checker has an autofix or not.
@@ -333,7 +338,7 @@ def has_auto_fix(check_name):
def process_doc(doc_file):
check_name = doc_file.replace('.rst', '')
- with open(os.path.join(docs_dir, doc_file), 'r') as doc:
+ with io.open(os.path.join(docs_dir, doc_file), 'r', encoding='utf8') as doc:
content = doc.read()
match = re.search('.*:orphan:.*', content)
@@ -376,7 +381,7 @@ def format_link_alias(doc_file):
checks_alias = map(format_link_alias, doc_files)
print('Updating %s...' % filename)
- with open(filename, 'w') as f:
+ with io.open(filename, 'w', encoding='utf8') as f:
for line in lines:
f.write(line)
if line.strip() == ".. csv-table::":
@@ -397,7 +402,7 @@ def write_docs(module_path, module, check_name):
filename = os.path.normpath(os.path.join(
module_path, '../../docs/clang-tidy/checks/', check_name_dashes + '.rst'))
print('Creating %s...' % filename)
- with open(filename, 'w') as f:
+ with io.open(filename, 'w', encoding='utf8') as f:
f.write(""".. title:: clang-tidy - %(check_name_dashes)s
%(check_name_dashes)s
diff --git a/clang-tools-extra/clang-tidy/rename_check.py b/clang-tools-extra/clang-tidy/rename_check.py
index 2410041fd5d2..0c48634ac62b 100755
--- a/clang-tools-extra/clang-tidy/rename_check.py
+++ b/clang-tools-extra/clang-tidy/rename_check.py
@@ -10,20 +10,25 @@
import argparse
import glob
+import io
import os
import re
-
def replaceInFileRegex(fileName, sFrom, sTo):
if sFrom == sTo:
return
+
+ # The documentation files are encoded using UTF-8, however on Windows the
+ # default encoding might be
diff erent (e.g. CP-1252). To make sure UTF-8 is
+ # always used, use `io.open(filename, mode, encoding='utf8')` for reading and
+ # writing files here and elsewhere.
txt = None
- with open(fileName, "r") as f:
+ with io.open(fileName, 'r', encoding='utf8') as f:
txt = f.read()
txt = re.sub(sFrom, sTo, txt)
print("Replacing '%s' -> '%s' in '%s'..." % (sFrom, sTo, fileName))
- with open(fileName, "w") as f:
+ with io.open(fileName, 'w', encoding='utf8') as f:
f.write(txt)
@@ -31,7 +36,7 @@ def replaceInFile(fileName, sFrom, sTo):
if sFrom == sTo:
return
txt = None
- with open(fileName, "r") as f:
+ with io.open(fileName, 'r', encoding='utf8') as f:
txt = f.read()
if sFrom not in txt:
@@ -39,7 +44,7 @@ def replaceInFile(fileName, sFrom, sTo):
txt = txt.replace(sFrom, sTo)
print("Replacing '%s' -> '%s' in '%s'..." % (sFrom, sTo, fileName))
- with open(fileName, "w") as f:
+ with io.open(fileName, 'w', encoding='utf8') as f:
f.write(txt)
@@ -70,7 +75,7 @@ def fileRename(fileName, sFrom, sTo):
def deleteMatchingLines(fileName, pattern):
lines = None
- with open(fileName, "r") as f:
+ with io.open(fileName, 'r', encoding='utf8') as f:
lines = f.readlines()
not_matching_lines = [l for l in lines if not re.search(pattern, l)]
@@ -79,7 +84,7 @@ def deleteMatchingLines(fileName, pattern):
print("Removing lines matching '%s' in '%s'..." % (pattern, fileName))
print(' ' + ' '.join([l for l in lines if re.search(pattern, l)]))
- with open(fileName, "w") as f:
+ with io.open(fileName, 'w', encoding='utf8') as f:
f.writelines(not_matching_lines)
return True
@@ -101,7 +106,7 @@ def getListOfFiles(clang_tidy_path):
# entry and 'False' if the entry already existed.
def adapt_cmake(module_path, check_name_camel):
filename = os.path.join(module_path, 'CMakeLists.txt')
- with open(filename, 'r') as f:
+ with io.open(filename, 'r', encoding='utf8') as f:
lines = f.readlines()
cpp_file = check_name_camel + '.cpp'
@@ -112,7 +117,7 @@ def adapt_cmake(module_path, check_name_camel):
return False
print('Updating %s...' % filename)
- with open(filename, 'wb') as f:
+ with io.open(filename, 'wb', encoding='utf8') as f:
cpp_found = False
file_added = False
for line in lines:
@@ -130,11 +135,11 @@ def adapt_cmake(module_path, check_name_camel):
def adapt_module(module_path, module, check_name, check_name_camel):
modulecpp = next(filter(lambda p: p.lower() == module.lower() + 'tidymodule.cpp', os.listdir(module_path)))
filename = os.path.join(module_path, modulecpp)
- with open(filename, 'r') as f:
+ with io.open(filename, 'r', encoding='utf8') as f:
lines = f.readlines()
print('Updating %s...' % filename)
- with open(filename, 'wb') as f:
+ with io.open(filename, 'wb', encoding='utf8') as f:
header_added = False
header_found = False
check_added = False
@@ -169,7 +174,7 @@ def adapt_module(module_path, module, check_name, check_name_camel):
def add_release_notes(clang_tidy_path, old_check_name, new_check_name):
filename = os.path.normpath(os.path.join(clang_tidy_path,
'../docs/ReleaseNotes.rst'))
- with open(filename, 'r') as f:
+ with io.open(filename, 'r', encoding='utf8') as f:
lines = f.readlines()
lineMatcher = re.compile('Renamed checks')
@@ -177,7 +182,7 @@ def add_release_notes(clang_tidy_path, old_check_name, new_check_name):
checkMatcher = re.compile('- The \'(.*)')
print('Updating %s...' % filename)
- with open(filename, 'wb') as f:
+ with io.open(filename, 'wb', encoding='utf8') as f:
note_added = False
header_found = False
add_note_here = False
More information about the cfe-commits
mailing list