https://github.com/python/cpython/commit/812ef66759f9fe27d68283d8e67d6cd3eb512be2
commit: 812ef66759f9fe27d68283d8e67d6cd3eb512be2
branch: main
author: Serhiy Storchaka <[email protected]>
committer: serhiy-storchaka <[email protected]>
date: 2026-02-26T11:30:08+02:00
summary:
gh-145202: Fix crash in unicodedata's GraphemeBreakIterator and Segment
(GH-145216)
Remove the tp_clear slots and make Segment members read-only.
Also add tests for reference loops involving GraphemeBreakIterator
and Segment.
files:
M Lib/test/test_unicodedata.py
M Modules/unicodedata.c
diff --git a/Lib/test/test_unicodedata.py b/Lib/test/test_unicodedata.py
index 30a26751d3ac54..8ecb0df2f8e5dd 100644
--- a/Lib/test/test_unicodedata.py
+++ b/Lib/test/test_unicodedata.py
@@ -12,7 +12,9 @@
import sys
import unicodedata
import unittest
+import weakref
from test.support import (
+ gc_collect,
open_urlresource,
requires_resource,
script_helper,
@@ -1338,6 +1340,28 @@ def run_grapheme_break_tests(self, testdata):
self.assertEqual([x.start for x in result], breaks[i:-1],
comment)
self.assertEqual([x.end for x in result], breaks[i+1:],
comment)
+ def test_reference_loops(self):
+ # Test that reference loops involving GraphemeBreakIterator or
+ # Segment can be broken by the garbage collector.
+ class S(str):
+ pass
+
+ s = S('abc')
+ s.ref = unicodedata.iter_graphemes(s)
+ wr = weakref.ref(s)
+ del s
+ self.assertIsNotNone(wr())
+ gc_collect()
+ self.assertIsNone(wr())
+
+ s = S('abc')
+ s.ref = next(unicodedata.iter_graphemes(s))
+ wr = weakref.ref(s)
+ del s
+ self.assertIsNotNone(wr())
+ gc_collect()
+ self.assertIsNone(wr())
+
if __name__ == "__main__":
unittest.main()
diff --git a/Modules/unicodedata.c b/Modules/unicodedata.c
index 401f64e7416944..2c67c23d98ed81 100644
--- a/Modules/unicodedata.c
+++ b/Modules/unicodedata.c
@@ -1925,13 +1925,6 @@ Segment_traverse(PyObject *self, visitproc visit, void
*arg)
return 0;
}
-static int
-Segment_clear(PyObject *self)
-{
- Py_CLEAR(((SegmentObject *)self)->string);
- return 0;
-}
-
static PyObject *
Segment_str(PyObject *self)
{
@@ -1947,9 +1940,9 @@ Segment_repr(PyObject *self)
}
static PyMemberDef Segment_members[] = {
- {"start", Py_T_PYSSIZET, offsetof(SegmentObject, start), 0,
+ {"start", Py_T_PYSSIZET, offsetof(SegmentObject, start), Py_READONLY,
PyDoc_STR("grapheme start")},
- {"end", Py_T_PYSSIZET, offsetof(SegmentObject, end), 0,
+ {"end", Py_T_PYSSIZET, offsetof(SegmentObject, end), Py_READONLY,
PyDoc_STR("grapheme end")},
{NULL} /* Sentinel */
};
@@ -1957,7 +1950,6 @@ static PyMemberDef Segment_members[] = {
static PyType_Slot Segment_slots[] = {
{Py_tp_dealloc, Segment_dealloc},
{Py_tp_traverse, Segment_traverse},
- {Py_tp_clear, Segment_clear},
{Py_tp_str, Segment_str},
{Py_tp_repr, Segment_repr},
{Py_tp_members, Segment_members},
@@ -2001,13 +1993,6 @@ GBI_traverse(PyObject *self, visitproc visit, void *arg)
return 0;
}
-static int
-GBI_clear(PyObject *self)
-{
- Py_CLEAR(((GraphemeBreakIterator *)self)->iter.str);
- return 0;
-}
-
static PyObject *
GBI_iternext(PyObject *self)
{
@@ -2038,7 +2023,6 @@ static PyType_Slot GraphemeBreakIterator_slots[] = {
{Py_tp_iter, PyObject_SelfIter},
{Py_tp_iternext, GBI_iternext},
{Py_tp_traverse, GBI_traverse},
- {Py_tp_clear, GBI_clear},
{0, 0},
};
_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3//lists/python-checkins.python.org
Member address: [email protected]