[PATCH] D45741: Python bindings: Fix handling of file bodies with multi-byte characters
mheinzler updated this revision to Diff 165728. mheinzler added a comment. Sorry, here's the diff for the whole files. The `b` function defined at the top of the file already does what you suggest. For python2 it returns the string unchanged, for python3 it calls `encode`. So there shouldn't be any change at all for python2. Repository: rC Clang https://reviews.llvm.org/D45741 Files: bindings/python/clang/cindex.py bindings/python/tests/cindex/test_translation_unit.py Index: bindings/python/tests/cindex/test_translation_unit.py === --- bindings/python/tests/cindex/test_translation_unit.py +++ bindings/python/tests/cindex/test_translation_unit.py @@ -1,3 +1,5 @@ +# -*- coding: utf-8 -*- + from contextlib import contextmanager import gc import os @@ -84,6 +86,16 @@ spellings = [c.spelling for c in tu.cursor.get_children()] self.assertEqual(spellings[-1], 'x') +def test_unsaved_files_encoding(self): +tu = TranslationUnit.from_source('fake.c', ['-I./'], unsaved_files = [ +('fake.c', """ +// 😀 +int x; +""") +]) +spellings = [c.spelling for c in tu.cursor.get_children()] +self.assertEqual(spellings[-1], 'x') + def assert_normpaths_equal(self, path1, path2): """ Compares two paths for equality after normalizing them with os.path.normpath Index: bindings/python/clang/cindex.py === --- bindings/python/clang/cindex.py +++ bindings/python/clang/cindex.py @@ -2803,7 +2803,7 @@ unsaved_array[i].name = b(name) unsaved_array[i].contents = b(contents) -unsaved_array[i].length = len(contents) +unsaved_array[i].length = len(unsaved_array[i].contents) ptr = conf.lib.clang_parseTranslationUnit(index, filename, args_array, len(args), unsaved_array, @@ -2983,9 +2983,10 @@ print(value) if not isinstance(value, str): raise TypeError('Unexpected unsaved file contents.') -unsaved_files_array[i].name = name -unsaved_files_array[i].contents = value -unsaved_files_array[i].length = len(value) +unsaved_files_array[i].name = b(name) +unsaved_files_array[i].contents = b(value) +unsaved_files_array[i].length = \ +len(unsaved_files_array[i].contents) ptr = conf.lib.clang_reparseTranslationUnit(self, len(unsaved_files), unsaved_files_array, options) @@ -3049,7 +3050,8 @@ raise TypeError('Unexpected unsaved file contents.') unsaved_files_array[i].name = b(name) unsaved_files_array[i].contents = b(value) -unsaved_files_array[i].length = len(value) +unsaved_files_array[i].length = \ +len(unsaved_files_array[i].contents) ptr = conf.lib.clang_codeCompleteAt(self, path, line, column, unsaved_files_array, len(unsaved_files), options) if ptr: Index: bindings/python/tests/cindex/test_translation_unit.py === --- bindings/python/tests/cindex/test_translation_unit.py +++ bindings/python/tests/cindex/test_translation_unit.py @@ -1,3 +1,5 @@ +# -*- coding: utf-8 -*- + from contextlib import contextmanager import gc import os @@ -84,6 +86,16 @@ spellings = [c.spelling for c in tu.cursor.get_children()] self.assertEqual(spellings[-1], 'x') +def test_unsaved_files_encoding(self): +tu = TranslationUnit.from_source('fake.c', ['-I./'], unsaved_files = [ +('fake.c', """ +// 😀 +int x; +""") +]) +spellings = [c.spelling for c in tu.cursor.get_children()] +self.assertEqual(spellings[-1], 'x') + def assert_normpaths_equal(self, path1, path2): """ Compares two paths for equality after normalizing them with os.path.normpath Index: bindings/python/clang/cindex.py === --- bindings/python/clang/cindex.py +++ bindings/python/clang/cindex.py @@ -2803,7 +2803,7 @@ unsaved_array[i].name = b(name) unsaved_array[i].contents = b(contents) -unsaved_array[i].length = len(contents) +unsaved_array[i].length = len(unsaved_array[i].contents) ptr = conf.lib.clang_parseTranslationUnit(index, filename, args_array, len(args), unsaved_array, @@ -2983,9 +2983,10 @@ print(value) if not isinstance(value, str): raise TypeError('Unexpected unsaved file contents.') -uns
[PATCH] D45741: Python bindings: Fix handling of file bodies with multi-byte characters
mheinzler abandoned this revision. mheinzler added a comment. Herald added a subscriber: arphaman. Herald added a reviewer: serge-sans-paille. Herald added a project: clang. I'm closing this because it has been fixed in master by: https://reviews.llvm.org/D56429 Repository: rC Clang CHANGES SINCE LAST ACTION https://reviews.llvm.org/D45741/new/ https://reviews.llvm.org/D45741 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D45741: Python bindings: Fix handling of file bodies with multi-byte characters
mheinzler created this revision. mheinzler added a reviewer: clang. Herald added a subscriber: cfe-commits. With python3 there is a difference between the length of the string and the length of the utf-8 encoded bytes array. To not cut off characters at the end when the string contains multi-byte characters, the length of file contents that gets passed to clang needs to be calculated from their bytes representation. I also added a test case that catches this. I needed to add the coding line at the top of the test unit to make python2 work with the embedded Unicode character. Alternatively we could replace the character with /u, but then there would be other problems with python2. Repository: rC Clang https://reviews.llvm.org/D45741 Files: bindings/python/clang/cindex.py bindings/python/tests/cindex/test_translation_unit.py Index: bindings/python/tests/cindex/test_translation_unit.py === --- bindings/python/tests/cindex/test_translation_unit.py +++ bindings/python/tests/cindex/test_translation_unit.py @@ -1,3 +1,5 @@ +# -*- coding: utf-8 -*- + import gc import os import tempfile @@ -83,6 +85,16 @@ spellings = [c.spelling for c in tu.cursor.get_children()] self.assertEqual(spellings[-1], 'x') +def test_unsaved_files_encoding(self): +tu = TranslationUnit.from_source('fake.c', ['-I./'], unsaved_files = [ +('fake.c', """ +// 😀 +int x; +""") +]) +spellings = [c.spelling for c in tu.cursor.get_children()] +self.assertEqual(spellings[-1], 'x') + def assert_normpaths_equal(self, path1, path2): """ Compares two paths for equality after normalizing them with os.path.normpath Index: bindings/python/clang/cindex.py === --- bindings/python/clang/cindex.py +++ bindings/python/clang/cindex.py @@ -2791,7 +2791,7 @@ unsaved_array[i].name = b(name) unsaved_array[i].contents = b(contents) -unsaved_array[i].length = len(contents) +unsaved_array[i].length = len(unsaved_array[i].contents) ptr = conf.lib.clang_parseTranslationUnit(index, filename, args_array, len(args), unsaved_array, @@ -2971,9 +2971,10 @@ print(value) if not isinstance(value, str): raise TypeError('Unexpected unsaved file contents.') -unsaved_files_array[i].name = name -unsaved_files_array[i].contents = value -unsaved_files_array[i].length = len(value) +unsaved_files_array[i].name = b(name) +unsaved_files_array[i].contents = b(value) +unsaved_files_array[i].length = \ +len(unsaved_files_array[i].contents) ptr = conf.lib.clang_reparseTranslationUnit(self, len(unsaved_files), unsaved_files_array, options) @@ -3037,7 +3038,8 @@ raise TypeError('Unexpected unsaved file contents.') unsaved_files_array[i].name = b(name) unsaved_files_array[i].contents = b(value) -unsaved_files_array[i].length = len(value) +unsaved_files_array[i].length = \ +len(unsaved_files_array[i].contents) ptr = conf.lib.clang_codeCompleteAt(self, path, line, column, unsaved_files_array, len(unsaved_files), options) if ptr: Index: bindings/python/tests/cindex/test_translation_unit.py === --- bindings/python/tests/cindex/test_translation_unit.py +++ bindings/python/tests/cindex/test_translation_unit.py @@ -1,3 +1,5 @@ +# -*- coding: utf-8 -*- + import gc import os import tempfile @@ -83,6 +85,16 @@ spellings = [c.spelling for c in tu.cursor.get_children()] self.assertEqual(spellings[-1], 'x') +def test_unsaved_files_encoding(self): +tu = TranslationUnit.from_source('fake.c', ['-I./'], unsaved_files = [ +('fake.c', """ +// 😀 +int x; +""") +]) +spellings = [c.spelling for c in tu.cursor.get_children()] +self.assertEqual(spellings[-1], 'x') + def assert_normpaths_equal(self, path1, path2): """ Compares two paths for equality after normalizing them with os.path.normpath Index: bindings/python/clang/cindex.py === --- bindings/python/clang/cindex.py +++ bindings/python/clang/cindex.py @@ -2791,7 +2791,7 @@ unsaved_array[i].name = b(name) unsaved_array[i].contents = b(contents) -unsaved_array[i].length = len(contents) +unsaved_array[i].length = len(unsaved_array[i].contents) ptr =