[PATCH] D45741: Python bindings: Fix handling of file bodies with multi-byte characters
Maximilian Heinzler via Phabricator via cfe-commits
cfe-commits at lists.llvm.org
Tue Apr 17 15:10:14 PDT 2018
mheinzler created this revision.
mheinzler added a reviewer: clang.
Herald added a subscriber: cfe-commits.
With python3 there is a difference between the length of the string and the length of the utf-8 encoded bytes array. To not cut off characters at the end when the string contains multi-byte characters, the length of file contents that gets passed to clang needs to be calculated from their bytes representation.
I also added a test case that catches this. I needed to add the coding line at the top of the test unit to make python2 work with the embedded Unicode character. Alternatively we could replace the character with /uXXXX, but then there would be other problems with python2.
Repository:
rC Clang
https://reviews.llvm.org/D45741
Files:
bindings/python/clang/cindex.py
bindings/python/tests/cindex/test_translation_unit.py
Index: bindings/python/tests/cindex/test_translation_unit.py
===================================================================
--- bindings/python/tests/cindex/test_translation_unit.py
+++ bindings/python/tests/cindex/test_translation_unit.py
@@ -1,3 +1,5 @@
+# -*- coding: utf-8 -*-
+
import gc
import os
import tempfile
@@ -83,6 +85,16 @@
spellings = [c.spelling for c in tu.cursor.get_children()]
self.assertEqual(spellings[-1], 'x')
+ def test_unsaved_files_encoding(self):
+ tu = TranslationUnit.from_source('fake.c', ['-I./'], unsaved_files = [
+ ('fake.c', """
+// 😀
+int x;
+""")
+ ])
+ spellings = [c.spelling for c in tu.cursor.get_children()]
+ self.assertEqual(spellings[-1], 'x')
+
def assert_normpaths_equal(self, path1, path2):
""" Compares two paths for equality after normalizing them with
os.path.normpath
Index: bindings/python/clang/cindex.py
===================================================================
--- bindings/python/clang/cindex.py
+++ bindings/python/clang/cindex.py
@@ -2791,7 +2791,7 @@
unsaved_array[i].name = b(name)
unsaved_array[i].contents = b(contents)
- unsaved_array[i].length = len(contents)
+ unsaved_array[i].length = len(unsaved_array[i].contents)
ptr = conf.lib.clang_parseTranslationUnit(index, filename, args_array,
len(args), unsaved_array,
@@ -2971,9 +2971,10 @@
print(value)
if not isinstance(value, str):
raise TypeError('Unexpected unsaved file contents.')
- unsaved_files_array[i].name = name
- unsaved_files_array[i].contents = value
- unsaved_files_array[i].length = len(value)
+ unsaved_files_array[i].name = b(name)
+ unsaved_files_array[i].contents = b(value)
+ unsaved_files_array[i].length = \
+ len(unsaved_files_array[i].contents)
ptr = conf.lib.clang_reparseTranslationUnit(self, len(unsaved_files),
unsaved_files_array, options)
@@ -3037,7 +3038,8 @@
raise TypeError('Unexpected unsaved file contents.')
unsaved_files_array[i].name = b(name)
unsaved_files_array[i].contents = b(value)
- unsaved_files_array[i].length = len(value)
+ unsaved_files_array[i].length = \
+ len(unsaved_files_array[i].contents)
ptr = conf.lib.clang_codeCompleteAt(self, path, line, column,
unsaved_files_array, len(unsaved_files), options)
if ptr:
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D45741.142842.patch
Type: text/x-patch
Size: 2753 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20180417/f1eae107/attachment.bin>
More information about the cfe-commits
mailing list