[PATCH] D45741: Python bindings: Fix handling of file bodies with multi-byte characters

2018-09-17 Thread Maximilian Heinzler via Phabricator via cfe-commits
mheinzler updated this revision to Diff 165728.
mheinzler added a comment.

Sorry, here's the diff for the whole files.

The `b` function defined at the top of the file already does what you suggest. 
For python2 it returns the string unchanged, for python3 it calls `encode`. So 
there shouldn't be any change at all for python2.


Repository:
  rC Clang

https://reviews.llvm.org/D45741

Files:
  bindings/python/clang/cindex.py
  bindings/python/tests/cindex/test_translation_unit.py


Index: bindings/python/tests/cindex/test_translation_unit.py
===
--- bindings/python/tests/cindex/test_translation_unit.py
+++ bindings/python/tests/cindex/test_translation_unit.py
@@ -1,3 +1,5 @@
+# -*- coding: utf-8 -*-
+
 from contextlib import contextmanager
 import gc
 import os
@@ -84,6 +86,16 @@
 spellings = [c.spelling for c in tu.cursor.get_children()]
 self.assertEqual(spellings[-1], 'x')
 
+def test_unsaved_files_encoding(self):
+tu = TranslationUnit.from_source('fake.c', ['-I./'], unsaved_files = [
+('fake.c', """
+// 😀
+int x;
+""")
+])
+spellings = [c.spelling for c in tu.cursor.get_children()]
+self.assertEqual(spellings[-1], 'x')
+
 def assert_normpaths_equal(self, path1, path2):
 """ Compares two paths for equality after normalizing them with
 os.path.normpath
Index: bindings/python/clang/cindex.py
===
--- bindings/python/clang/cindex.py
+++ bindings/python/clang/cindex.py
@@ -2803,7 +2803,7 @@
 
 unsaved_array[i].name = b(name)
 unsaved_array[i].contents = b(contents)
-unsaved_array[i].length = len(contents)
+unsaved_array[i].length = len(unsaved_array[i].contents)
 
 ptr = conf.lib.clang_parseTranslationUnit(index, filename, args_array,
 len(args), unsaved_array,
@@ -2983,9 +2983,10 @@
 print(value)
 if not isinstance(value, str):
 raise TypeError('Unexpected unsaved file contents.')
-unsaved_files_array[i].name = name
-unsaved_files_array[i].contents = value
-unsaved_files_array[i].length = len(value)
+unsaved_files_array[i].name = b(name)
+unsaved_files_array[i].contents = b(value)
+unsaved_files_array[i].length = \
+len(unsaved_files_array[i].contents)
 ptr = conf.lib.clang_reparseTranslationUnit(self, len(unsaved_files),
 unsaved_files_array, options)
 
@@ -3049,7 +3050,8 @@
 raise TypeError('Unexpected unsaved file contents.')
 unsaved_files_array[i].name = b(name)
 unsaved_files_array[i].contents = b(value)
-unsaved_files_array[i].length = len(value)
+unsaved_files_array[i].length = \
+len(unsaved_files_array[i].contents)
 ptr = conf.lib.clang_codeCompleteAt(self, path, line, column,
 unsaved_files_array, len(unsaved_files), options)
 if ptr:


Index: bindings/python/tests/cindex/test_translation_unit.py
===
--- bindings/python/tests/cindex/test_translation_unit.py
+++ bindings/python/tests/cindex/test_translation_unit.py
@@ -1,3 +1,5 @@
+# -*- coding: utf-8 -*-
+
 from contextlib import contextmanager
 import gc
 import os
@@ -84,6 +86,16 @@
 spellings = [c.spelling for c in tu.cursor.get_children()]
 self.assertEqual(spellings[-1], 'x')
 
+def test_unsaved_files_encoding(self):
+tu = TranslationUnit.from_source('fake.c', ['-I./'], unsaved_files = [
+('fake.c', """
+// 😀
+int x;
+""")
+])
+spellings = [c.spelling for c in tu.cursor.get_children()]
+self.assertEqual(spellings[-1], 'x')
+
 def assert_normpaths_equal(self, path1, path2):
 """ Compares two paths for equality after normalizing them with
 os.path.normpath
Index: bindings/python/clang/cindex.py
===
--- bindings/python/clang/cindex.py
+++ bindings/python/clang/cindex.py
@@ -2803,7 +2803,7 @@
 
 unsaved_array[i].name = b(name)
 unsaved_array[i].contents = b(contents)
-unsaved_array[i].length = len(contents)
+unsaved_array[i].length = len(unsaved_array[i].contents)
 
 ptr = conf.lib.clang_parseTranslationUnit(index, filename, args_array,
 len(args), unsaved_array,
@@ -2983,9 +2983,10 @@
 print(value)
 if not isinstance(value, str):
 raise TypeError('Unexpected unsaved file contents.')
-uns

[PATCH] D45741: Python bindings: Fix handling of file bodies with multi-byte characters

2019-02-03 Thread Maximilian Heinzler via Phabricator via cfe-commits
mheinzler abandoned this revision.
mheinzler added a comment.
Herald added a subscriber: arphaman.
Herald added a reviewer: serge-sans-paille.
Herald added a project: clang.

I'm closing this because it has been fixed in master by:
https://reviews.llvm.org/D56429


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D45741/new/

https://reviews.llvm.org/D45741



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D45741: Python bindings: Fix handling of file bodies with multi-byte characters

2018-04-17 Thread Maximilian Heinzler via Phabricator via cfe-commits
mheinzler created this revision.
mheinzler added a reviewer: clang.
Herald added a subscriber: cfe-commits.

With python3 there is a difference between the length of the string and the 
length of the utf-8 encoded bytes array. To not cut off characters at the end 
when the string contains multi-byte characters, the length of file contents 
that gets passed to clang needs to be calculated from their bytes 
representation.

I also added a test case that catches this. I needed to add the coding line at 
the top of the test unit to make python2 work with the embedded Unicode 
character. Alternatively we could replace the character with /u, but then 
there would be other problems with python2.


Repository:
  rC Clang

https://reviews.llvm.org/D45741

Files:
  bindings/python/clang/cindex.py
  bindings/python/tests/cindex/test_translation_unit.py


Index: bindings/python/tests/cindex/test_translation_unit.py
===
--- bindings/python/tests/cindex/test_translation_unit.py
+++ bindings/python/tests/cindex/test_translation_unit.py
@@ -1,3 +1,5 @@
+# -*- coding: utf-8 -*-
+
 import gc
 import os
 import tempfile
@@ -83,6 +85,16 @@
 spellings = [c.spelling for c in tu.cursor.get_children()]
 self.assertEqual(spellings[-1], 'x')
 
+def test_unsaved_files_encoding(self):
+tu = TranslationUnit.from_source('fake.c', ['-I./'], unsaved_files = [
+('fake.c', """
+// 😀
+int x;
+""")
+])
+spellings = [c.spelling for c in tu.cursor.get_children()]
+self.assertEqual(spellings[-1], 'x')
+
 def assert_normpaths_equal(self, path1, path2):
 """ Compares two paths for equality after normalizing them with
 os.path.normpath
Index: bindings/python/clang/cindex.py
===
--- bindings/python/clang/cindex.py
+++ bindings/python/clang/cindex.py
@@ -2791,7 +2791,7 @@
 
 unsaved_array[i].name = b(name)
 unsaved_array[i].contents = b(contents)
-unsaved_array[i].length = len(contents)
+unsaved_array[i].length = len(unsaved_array[i].contents)
 
 ptr = conf.lib.clang_parseTranslationUnit(index, filename, args_array,
 len(args), unsaved_array,
@@ -2971,9 +2971,10 @@
 print(value)
 if not isinstance(value, str):
 raise TypeError('Unexpected unsaved file contents.')
-unsaved_files_array[i].name = name
-unsaved_files_array[i].contents = value
-unsaved_files_array[i].length = len(value)
+unsaved_files_array[i].name = b(name)
+unsaved_files_array[i].contents = b(value)
+unsaved_files_array[i].length = \
+len(unsaved_files_array[i].contents)
 ptr = conf.lib.clang_reparseTranslationUnit(self, len(unsaved_files),
 unsaved_files_array, options)
 
@@ -3037,7 +3038,8 @@
 raise TypeError('Unexpected unsaved file contents.')
 unsaved_files_array[i].name = b(name)
 unsaved_files_array[i].contents = b(value)
-unsaved_files_array[i].length = len(value)
+unsaved_files_array[i].length = \
+len(unsaved_files_array[i].contents)
 ptr = conf.lib.clang_codeCompleteAt(self, path, line, column,
 unsaved_files_array, len(unsaved_files), options)
 if ptr:


Index: bindings/python/tests/cindex/test_translation_unit.py
===
--- bindings/python/tests/cindex/test_translation_unit.py
+++ bindings/python/tests/cindex/test_translation_unit.py
@@ -1,3 +1,5 @@
+# -*- coding: utf-8 -*-
+
 import gc
 import os
 import tempfile
@@ -83,6 +85,16 @@
 spellings = [c.spelling for c in tu.cursor.get_children()]
 self.assertEqual(spellings[-1], 'x')
 
+def test_unsaved_files_encoding(self):
+tu = TranslationUnit.from_source('fake.c', ['-I./'], unsaved_files = [
+('fake.c', """
+// 😀
+int x;
+""")
+])
+spellings = [c.spelling for c in tu.cursor.get_children()]
+self.assertEqual(spellings[-1], 'x')
+
 def assert_normpaths_equal(self, path1, path2):
 """ Compares two paths for equality after normalizing them with
 os.path.normpath
Index: bindings/python/clang/cindex.py
===
--- bindings/python/clang/cindex.py
+++ bindings/python/clang/cindex.py
@@ -2791,7 +2791,7 @@
 
 unsaved_array[i].name = b(name)
 unsaved_array[i].contents = b(contents)
-unsaved_array[i].length = len(contents)
+unsaved_array[i].length = len(unsaved_array[i].contents)
 
 ptr =