[Python-checkins] [3.14] gh-144545: Improve handling of default values in Argument Clinic (GH-146016) (GH-146052)

serhiy-storchaka Tue, 17 Mar 2026 03:55:46 -0700

https://github.com/python/cpython/commit/a005f323b7c8a7c9cd06b74d02a2d3bd7134841c
commit: a005f323b7c8a7c9cd06b74d02a2d3bd7134841c
branch: 3.14
author: Serhiy Storchaka <[email protected]>
committer: serhiy-storchaka <[email protected]>
date: 2026-03-17T10:55:15Z
summary:


[3.14] gh-144545: Improve handling of default values in Argument Clinic 
(GH-146016) (GH-146052)

* Add the c_init_default attribute which is used to initialize the C variable
  if the default is not explicitly provided.
* Add the c_default_init() method which is used to derive c_default from
  default if c_default is not explicitly provided.
* Explicit c_default and py_default are now almost always have precedence
  over the generated value.
* Add support for bytes literals as default values.
* Improve support for str literals as default values (support non-ASCII
  and non-printable characters and special characters like backslash or quotes).
* Fix support for str and bytes literals containing trigraphs, "/*" and "*/".
* Improve support for default values in converters "char" and 
"int(accept={str})".
* Converter "int(accept={str})" now requires 1-character string instead of
  integer as default value.
* Add support for non-None default values in converter "Py_buffer": NULL,
  str and bytes literals.
* Improve error handling for invalid default values.
* Rename Null to NullType for consistency.
(cherry picked from commit 99e2c5eccd2b83ac955125522a952a4ff5c7eb43)

files:
M Lib/test/clinic.test.c
M Lib/test/test_clinic.py
M Modules/_testclinic.c
M Modules/blake2module.c
M Modules/clinic/_testclinic.c.h
M Modules/clinic/blake2module.c.h
M Modules/clinic/zlibmodule.c.h
M Modules/posixmodule.c
M Modules/zlibmodule.c
M Objects/unicodeobject.c
M Tools/c-analyzer/cpython/_parser.py
M Tools/clinic/libclinic/__init__.py
M Tools/clinic/libclinic/clanguage.py
M Tools/clinic/libclinic/converter.py
M Tools/clinic/libclinic/converters.py
M Tools/clinic/libclinic/dsl_parser.py
M Tools/clinic/libclinic/formatting.py
M Tools/clinic/libclinic/utils.py

diff --git a/Lib/test/clinic.test.c b/Lib/test/clinic.test.c
index 4a67fcd2c3e9b3..4cec427dbaa885 100644
--- a/Lib/test/clinic.test.c
+++ b/Lib/test/clinic.test.c
@@ -530,19 +530,19 @@ test_char_converter(PyObject *module, PyObject *const 
*args, Py_ssize_t nargs)
 {
     PyObject *return_value = NULL;
     char a = 'A';
-    char b = '\x07';
-    char c = '\x08';
+    char b = '\a';
+    char c = '\b';
     char d = '\t';
     char e = '\n';
-    char f = '\x0b';
-    char g = '\x0c';
+    char f = '\v';
+    char g = '\f';
     char h = '\r';
     char i = '"';
     char j = '\'';
     char k = '?';
     char l = '\\';
-    char m = '\x00';
-    char n = '\xff';
+    char m = '\0';
+    char n = '\377';
 
     if (!_PyArg_CheckPositional("test_char_converter", nargs, 0, 14)) {
         goto exit;
@@ -936,7 +936,7 @@ static PyObject *
 test_char_converter_impl(PyObject *module, char a, char b, char c, char d,
                          char e, char f, char g, char h, char i, char j,
                          char k, char l, char m, char n)
-/*[clinic end generated code: output=ff11e203248582df input=e42330417a44feac]*/
+/*[clinic end generated code: output=6503d15448e1d4c4 input=e42330417a44feac]*/
 
 
 /*[clinic input]
@@ -1173,14 +1173,14 @@ test_int_converter
 
     a: int = 12
     b: int(accept={int}) = 34
-    c: int(accept={str}) = 45
+    c: int(accept={str}) = '-'
     d: int(type='myenum') = 67
     /
 
 [clinic start generated code]*/
 
 PyDoc_STRVAR(test_int_converter__doc__,
-"test_int_converter($module, a=12, b=34, c=45, d=67, /)\n"
+"test_int_converter($module, a=12, b=34, c=\'-\', d=67, /)\n"
 "--\n"
 "\n");
 
@@ -1196,7 +1196,7 @@ test_int_converter(PyObject *module, PyObject *const 
*args, Py_ssize_t nargs)
     PyObject *return_value = NULL;
     int a = 12;
     int b = 34;
-    int c = 45;
+    int c = '-';
     myenum d = 67;
 
     if (!_PyArg_CheckPositional("test_int_converter", nargs, 0, 4)) {
@@ -1247,7 +1247,7 @@ test_int_converter(PyObject *module, PyObject *const 
*args, Py_ssize_t nargs)
 
 static PyObject *
 test_int_converter_impl(PyObject *module, int a, int b, int c, myenum d)
-/*[clinic end generated code: output=fbcfb7554688663d input=d20541fc1ca0553e]*/
+/*[clinic end generated code: output=d5357b563bdb8789 input=5d8f4eb5899b24de]*/
 
 
 /*[clinic input]
diff --git a/Lib/test/test_clinic.py b/Lib/test/test_clinic.py
index b25108e0ff7903..73bb942af7c0a1 100644
--- a/Lib/test/test_clinic.py
+++ b/Lib/test/test_clinic.py
@@ -1044,6 +1044,187 @@ def test_param_with_continuations(self):
         p = function.parameters['follow_symlinks']
         self.assertEqual(True, p.default)
 
+    def test_param_default_none(self):
+        function = self.parse_function(r"""
+            module test
+            test.func
+                obj: object = None
+                str: str(accept={str, NoneType}) = None
+                buf: Py_buffer(accept={str, buffer, NoneType}) = None
+            """)
+        p = function.parameters['obj']
+        self.assertIs(p.default, None)
+        self.assertEqual(p.converter.py_default, 'None')
+        self.assertEqual(p.converter.c_default, 'Py_None')
+
+        p = function.parameters['str']
+        self.assertIs(p.default, None)
+        self.assertEqual(p.converter.py_default, 'None')
+        self.assertEqual(p.converter.c_default, 'NULL')
+
+        p = function.parameters['buf']
+        self.assertIs(p.default, None)
+        self.assertEqual(p.converter.py_default, 'None')
+        self.assertEqual(p.converter.c_default, '{NULL, NULL}')
+
+    def test_param_default_null(self):
+        function = self.parse_function(r"""
+            module test
+            test.func
+                obj: object = NULL
+                str: str = NULL
+                buf: Py_buffer = NULL
+                fsencoded: unicode_fs_encoded = NULL
+                fsdecoded: unicode_fs_decoded = NULL
+            """)
+        p = function.parameters['obj']
+        self.assertIs(p.default, NULL)
+        self.assertEqual(p.converter.py_default, '<unrepresentable>')
+        self.assertEqual(p.converter.c_default, 'NULL')
+
+        p = function.parameters['str']
+        self.assertIs(p.default, NULL)
+        self.assertEqual(p.converter.py_default, '<unrepresentable>')
+        self.assertEqual(p.converter.c_default, 'NULL')
+
+        p = function.parameters['buf']
+        self.assertIs(p.default, NULL)
+        self.assertEqual(p.converter.py_default, '<unrepresentable>')
+        self.assertEqual(p.converter.c_default, '{NULL, NULL}')
+
+        p = function.parameters['fsencoded']
+        self.assertIs(p.default, NULL)
+        self.assertEqual(p.converter.py_default, '<unrepresentable>')
+        self.assertEqual(p.converter.c_default, 'NULL')
+
+        p = function.parameters['fsdecoded']
+        self.assertIs(p.default, NULL)
+        self.assertEqual(p.converter.py_default, '<unrepresentable>')
+        self.assertEqual(p.converter.c_default, 'NULL')
+
+    def test_param_default_str_literal(self):
+        function = self.parse_function(r"""
+            module test
+            test.func
+                str: str = ' \t\n\r\v\f\xa0'
+                buf: Py_buffer(accept={str, buffer}) = ' \t\n\r\v\f\xa0'
+            """)
+        p = function.parameters['str']
+        self.assertEqual(p.default, ' \t\n\r\v\f\xa0')
+        self.assertEqual(p.converter.py_default, r"' \t\n\r\x0b\x0c\xa0'")
+        self.assertEqual(p.converter.c_default, r'" \t\n\r\v\f\u00a0"')
+
+        p = function.parameters['buf']
+        self.assertEqual(p.default, ' \t\n\r\v\f\xa0')
+        self.assertEqual(p.converter.py_default, r"' \t\n\r\x0b\x0c\xa0'")
+        self.assertEqual(p.converter.c_default,
+                         r'{.buf = " \t\n\r\v\f\302\240", .obj = NULL, .len = 
8}')
+
+    def test_param_default_bytes_literal(self):
+        function = self.parse_function(r"""
+            module test
+            test.func
+                str: str(accept={robuffer}) = b' \t\n\r\v\f\xa0'
+                buf: Py_buffer = b' \t\n\r\v\f\xa0'
+            """)
+        p = function.parameters['str']
+        self.assertEqual(p.default, b' \t\n\r\v\f\xa0')
+        self.assertEqual(p.converter.py_default, r"b' \t\n\r\x0b\x0c\xa0'")
+        self.assertEqual(p.converter.c_default, r'" \t\n\r\v\f\240"')
+
+        p = function.parameters['buf']
+        self.assertEqual(p.default, b' \t\n\r\v\f\xa0')
+        self.assertEqual(p.converter.py_default, r"b' \t\n\r\x0b\x0c\xa0'")
+        self.assertEqual(p.converter.c_default,
+                         r'{.buf = " \t\n\r\v\f\240", .obj = NULL, .len = 7}')
+
+    def test_param_default_byte_literal(self):
+        function = self.parse_function(r"""
+            module test
+            test.func
+                zero: char = b'\0'
+                one: char = b'\1'
+                lf: char = b'\n'
+                nbsp: char = b'\xa0'
+            """)
+        p = function.parameters['zero']
+        self.assertEqual(p.default, b'\0')
+        self.assertEqual(p.converter.py_default, r"b'\x00'")
+        self.assertEqual(p.converter.c_default, r"'\0'")
+
+        p = function.parameters['one']
+        self.assertEqual(p.default, b'\1')
+        self.assertEqual(p.converter.py_default, r"b'\x01'")
+        self.assertEqual(p.converter.c_default, r"'\001'")
+
+        p = function.parameters['lf']
+        self.assertEqual(p.default, b'\n')
+        self.assertEqual(p.converter.py_default, r"b'\n'")
+        self.assertEqual(p.converter.c_default, r"'\n'")
+
+        p = function.parameters['nbsp']
+        self.assertEqual(p.default, b'\xa0')
+        self.assertEqual(p.converter.py_default, r"b'\xa0'")
+        self.assertEqual(p.converter.c_default, r"'\240'")
+
+    def test_param_default_unicode_char(self):
+        function = self.parse_function(r"""
+            module test
+            test.func
+                zero: int(accept={str}) = '\0'
+                one: int(accept={str}) = '\1'
+                lf: int(accept={str}) = '\n'
+                nbsp: int(accept={str}) = '\xa0'
+                snake: int(accept={str}) = '\U0001f40d'
+            """)
+        p = function.parameters['zero']
+        self.assertEqual(p.default, '\0')
+        self.assertEqual(p.converter.py_default, r"'\x00'")
+        self.assertEqual(p.converter.c_default, '0')
+
+        p = function.parameters['one']
+        self.assertEqual(p.default, '\1')
+        self.assertEqual(p.converter.py_default, r"'\x01'")
+        self.assertEqual(p.converter.c_default, '0x01')
+
+        p = function.parameters['lf']
+        self.assertEqual(p.default, '\n')
+        self.assertEqual(p.converter.py_default, r"'\n'")
+        self.assertEqual(p.converter.c_default, r"'\n'")
+
+        p = function.parameters['nbsp']
+        self.assertEqual(p.default, '\xa0')
+        self.assertEqual(p.converter.py_default, r"'\xa0'")
+        self.assertEqual(p.converter.c_default, '0xa0')
+
+        p = function.parameters['snake']
+        self.assertEqual(p.default, '\U0001f40d')
+        self.assertEqual(p.converter.py_default, "'\U0001f40d'")
+        self.assertEqual(p.converter.c_default, '0x1f40d')
+
+    def test_param_default_bool(self):
+        function = self.parse_function(r"""
+            module test
+            test.func
+                bool: bool = True
+                intbool: bool(accept={int}) = True
+                intbool2: bool(accept={int}) = 2
+            """)
+        p = function.parameters['bool']
+        self.assertIs(p.default, True)
+        self.assertEqual(p.converter.py_default, 'True')
+        self.assertEqual(p.converter.c_default, '1')
+
+        p = function.parameters['intbool']
+        self.assertIs(p.default, True)
+        self.assertEqual(p.converter.py_default, 'True')
+        self.assertEqual(p.converter.c_default, '1')
+
+        p = function.parameters['intbool2']
+        self.assertEqual(p.default, 2)
+        self.assertEqual(p.converter.py_default, '2')
+        self.assertEqual(p.converter.c_default, '2')
+
     def test_param_default_expr_named_constant(self):
         function = self.parse_function("""
             module os
@@ -4209,6 +4390,56 @@ def test_format_escape(self):
         out = libclinic.format_escape(line)
         self.assertEqual(out, expected)
 
+    def test_c_bytes_repr(self):
+        c_bytes_repr = libclinic.c_bytes_repr
+        self.assertEqual(c_bytes_repr(b''), '""')
+        self.assertEqual(c_bytes_repr(b'abc'), '"abc"')
+        self.assertEqual(c_bytes_repr(b'\a\b\f\n\r\t\v'), r'"\a\b\f\n\r\t\v"')
+        self.assertEqual(c_bytes_repr(b' \0\x7f'), r'" \000\177"')
+        self.assertEqual(c_bytes_repr(b'"'), r'"\""')
+        self.assertEqual(c_bytes_repr(b"'"), r'''"'"''')
+        self.assertEqual(c_bytes_repr(b'\\'), r'"\\"')
+        self.assertEqual(c_bytes_repr(b'??/'), r'"?\?/"')
+        self.assertEqual(c_bytes_repr(b'???/'), r'"?\?\?/"')
+        self.assertEqual(c_bytes_repr(b'/*****/ /*/ */*'), r'"/\*****\/ /\*\/ 
*\/\*"')
+        self.assertEqual(c_bytes_repr(b'\xa0'), r'"\240"')
+        self.assertEqual(c_bytes_repr(b'\xff'), r'"\377"')
+
+    def test_c_str_repr(self):
+        c_str_repr = libclinic.c_str_repr
+        self.assertEqual(c_str_repr(''), '""')
+        self.assertEqual(c_str_repr('abc'), '"abc"')
+        self.assertEqual(c_str_repr('\a\b\f\n\r\t\v'), r'"\a\b\f\n\r\t\v"')
+        self.assertEqual(c_str_repr(' \0\x7f'), r'" \000\177"')
+        self.assertEqual(c_str_repr('"'), r'"\""')
+        self.assertEqual(c_str_repr("'"), r'''"'"''')
+        self.assertEqual(c_str_repr('\\'), r'"\\"')
+        self.assertEqual(c_str_repr('??/'), r'"?\?/"')
+        self.assertEqual(c_str_repr('???/'), r'"?\?\?/"')
+        self.assertEqual(c_str_repr('/*****/ /*/ */*'), r'"/\*****\/ /\*\/ 
*\/\*"')
+        self.assertEqual(c_str_repr('\xa0'), r'"\u00a0"')
+        self.assertEqual(c_str_repr('\xff'), r'"\u00ff"')
+        self.assertEqual(c_str_repr('\u20ac'), r'"\u20ac"')
+        self.assertEqual(c_str_repr('\U0001f40d'), r'"\U0001f40d"')
+
+    def test_c_unichar_repr(self):
+        c_unichar_repr = libclinic.c_unichar_repr
+        self.assertEqual(c_unichar_repr('a'), "'a'")
+        self.assertEqual(c_unichar_repr('\n'), r"'\n'")
+        self.assertEqual(c_unichar_repr('\b'), r"'\b'")
+        self.assertEqual(c_unichar_repr('\0'), '0')
+        self.assertEqual(c_unichar_repr('\1'), '0x01')
+        self.assertEqual(c_unichar_repr('\x7f'), '0x7f')
+        self.assertEqual(c_unichar_repr(' '), "' '")
+        self.assertEqual(c_unichar_repr('"'), """'"'""")
+        self.assertEqual(c_unichar_repr("'"), r"'\''")
+        self.assertEqual(c_unichar_repr('\\'), r"'\\'")
+        self.assertEqual(c_unichar_repr('?'), "'?'")
+        self.assertEqual(c_unichar_repr('\xa0'), '0xa0')
+        self.assertEqual(c_unichar_repr('\xff'), '0xff')
+        self.assertEqual(c_unichar_repr('\u20ac'), '0x20ac')
+        self.assertEqual(c_unichar_repr('\U0001f40d'), '0x1f40d')
+
     def test_indent_all_lines(self):
         # Blank lines are expected to be unchanged.
         self.assertEqual(libclinic.indent_all_lines("", prefix="bar"), "")
diff --git a/Modules/_testclinic.c b/Modules/_testclinic.c
index 3e903b6d87d89f..1d23198dac52b2 100644
--- a/Modules/_testclinic.c
+++ b/Modules/_testclinic.c
@@ -334,14 +334,14 @@ int_converter
 
     a: int = 12
     b: int(accept={int}) = 34
-    c: int(accept={str}) = 45
+    c: int(accept={str}) = '-'
     /
 
 [clinic start generated code]*/
 
 static PyObject *
 int_converter_impl(PyObject *module, int a, int b, int c)
-/*[clinic end generated code: output=8e56b59be7d0c306 input=a1dbc6344853db7a]*/
+/*[clinic end generated code: output=8e56b59be7d0c306 input=9a306d4dc907e339]*/
 {
     RETURN_PACKED_ARGS(3, PyLong_FromLong, long, a, b, c);
 }
@@ -1360,6 +1360,7 @@ clone_f2_impl(PyObject *module, const char *path)
 class custom_t_converter(CConverter):
     type = 'custom_t'
     converter = 'custom_converter'
+    c_init_default = "<placeholder>"  # overridden in pre_render(()
 
     def pre_render(self):
         self.c_default = f'''{{
@@ -1367,7 +1368,7 @@ class custom_t_converter(CConverter):
         }}'''
 
 [python start generated code]*/
-/*[python end generated code: output=da39a3ee5e6b4b0d input=b2fb801e99a06bf6]*/
+/*[python end generated code: output=da39a3ee5e6b4b0d input=78fe84e5ecc0481b]*/
 
 
 /*[clinic input]
diff --git a/Modules/blake2module.c b/Modules/blake2module.c
index ae37e2d3383f9b..e31fa8131f1ecf 100644
--- a/Modules/blake2module.c
+++ b/Modules/blake2module.c
@@ -658,9 +658,9 @@ _blake2.blake2b.__new__ as py_blake2b_new
     data as data_obj: object(c_default="NULL") = b''
     *
     digest_size: int(c_default="HACL_HASH_BLAKE2B_OUT_BYTES") = 
_blake2.blake2b.MAX_DIGEST_SIZE
-    key: Py_buffer(c_default="NULL", py_default="b''") = None
-    salt: Py_buffer(c_default="NULL", py_default="b''") = None
-    person: Py_buffer(c_default="NULL", py_default="b''") = None
+    key: Py_buffer = b''
+    salt: Py_buffer = b''
+    person: Py_buffer = b''
     fanout: int = 1
     depth: int = 1
     leaf_size: unsigned_long = 0
@@ -681,7 +681,7 @@ py_blake2b_new_impl(PyTypeObject *type, PyObject *data_obj, 
int digest_size,
                     unsigned long long node_offset, int node_depth,
                     int inner_size, int last_node, int usedforsecurity,
                     PyObject *string)
-/*[clinic end generated code: output=de64bd850606b6a0 input=78cf60a2922d2f90]*/
+/*[clinic end generated code: output=de64bd850606b6a0 input=32832fb37d13c03d]*/
 {
     PyObject *data;
     if (_Py_hashlib_data_argument(&data, data_obj, string) < 0) {
@@ -696,9 +696,9 @@ _blake2.blake2s.__new__ as py_blake2s_new
     data as data_obj: object(c_default="NULL") = b''
     *
     digest_size: int(c_default="HACL_HASH_BLAKE2S_OUT_BYTES") = 
_blake2.blake2s.MAX_DIGEST_SIZE
-    key: Py_buffer(c_default="NULL", py_default="b''") = None
-    salt: Py_buffer(c_default="NULL", py_default="b''") = None
-    person: Py_buffer(c_default="NULL", py_default="b''") = None
+    key: Py_buffer = b''
+    salt: Py_buffer = b''
+    person: Py_buffer = b''
     fanout: int = 1
     depth: int = 1
     leaf_size: unsigned_long = 0
@@ -719,7 +719,7 @@ py_blake2s_new_impl(PyTypeObject *type, PyObject *data_obj, 
int digest_size,
                     unsigned long long node_offset, int node_depth,
                     int inner_size, int last_node, int usedforsecurity,
                     PyObject *string)
-/*[clinic end generated code: output=582a0c4295cc3a3c input=6843d6332eefd295]*/
+/*[clinic end generated code: output=582a0c4295cc3a3c input=da467fc9dae646bb]*/
 {
     PyObject *data;
     if (_Py_hashlib_data_argument(&data, data_obj, string) < 0) {
diff --git a/Modules/clinic/_testclinic.c.h b/Modules/clinic/_testclinic.c.h
index 970528ce9ea46d..b652634892c27f 100644
--- a/Modules/clinic/_testclinic.c.h
+++ b/Modules/clinic/_testclinic.c.h
@@ -273,19 +273,19 @@ char_converter(PyObject *module, PyObject *const *args, 
Py_ssize_t nargs)
 {
     PyObject *return_value = NULL;
     char a = 'A';
-    char b = '\x07';
-    char c = '\x08';
+    char b = '\a';
+    char c = '\b';
     char d = '\t';
     char e = '\n';
-    char f = '\x0b';
-    char g = '\x0c';
+    char f = '\v';
+    char g = '\f';
     char h = '\r';
     char i = '"';
     char j = '\'';
     char k = '?';
     char l = '\\';
-    char m = '\x00';
-    char n = '\xff';
+    char m = '\0';
+    char n = '\377';
 
     if (!_PyArg_CheckPositional("char_converter", nargs, 0, 14)) {
         goto exit;
@@ -860,7 +860,7 @@ unsigned_short_converter(PyObject *module, PyObject *const 
*args, Py_ssize_t nar
 }
 
 PyDoc_STRVAR(int_converter__doc__,
-"int_converter($module, a=12, b=34, c=45, /)\n"
+"int_converter($module, a=12, b=34, c=\'-\', /)\n"
 "--\n"
 "\n");
 
@@ -876,7 +876,7 @@ int_converter(PyObject *module, PyObject *const *args, 
Py_ssize_t nargs)
     PyObject *return_value = NULL;
     int a = 12;
     int b = 34;
-    int c = 45;
+    int c = '-';
 
     if (!_PyArg_CheckPositional("int_converter", nargs, 0, 3)) {
         goto exit;
@@ -4481,4 +4481,4 @@ 
_testclinic_TestClass_posonly_poskw_varpos_array_no_fastcall(PyObject *type, PyO
 exit:
     return return_value;
 }
-/*[clinic end generated code: output=84ffc31f27215baa input=a9049054013a1b77]*/
+/*[clinic end generated code: output=8af194d826d6740d input=a9049054013a1b77]*/
diff --git a/Modules/clinic/blake2module.c.h b/Modules/clinic/blake2module.c.h
index 9e9cd56e569b24..556f344e34740b 100644
--- a/Modules/clinic/blake2module.c.h
+++ b/Modules/clinic/blake2module.c.h
@@ -63,9 +63,9 @@ py_blake2b_new(PyTypeObject *type, PyObject *args, PyObject 
*kwargs)
     Py_ssize_t noptargs = nargs + (kwargs ? PyDict_GET_SIZE(kwargs) : 0) - 0;
     PyObject *data_obj = NULL;
     int digest_size = HACL_HASH_BLAKE2B_OUT_BYTES;
-    Py_buffer key = {NULL, NULL};
-    Py_buffer salt = {NULL, NULL};
-    Py_buffer person = {NULL, NULL};
+    Py_buffer key = {.buf = "", .obj = NULL, .len = 0};
+    Py_buffer salt = {.buf = "", .obj = NULL, .len = 0};
+    Py_buffer person = {.buf = "", .obj = NULL, .len = 0};
     int fanout = 1;
     int depth = 1;
     unsigned long leaf_size = 0;
@@ -272,9 +272,9 @@ py_blake2s_new(PyTypeObject *type, PyObject *args, PyObject 
*kwargs)
     Py_ssize_t noptargs = nargs + (kwargs ? PyDict_GET_SIZE(kwargs) : 0) - 0;
     PyObject *data_obj = NULL;
     int digest_size = HACL_HASH_BLAKE2S_OUT_BYTES;
-    Py_buffer key = {NULL, NULL};
-    Py_buffer salt = {NULL, NULL};
-    Py_buffer person = {NULL, NULL};
+    Py_buffer key = {.buf = "", .obj = NULL, .len = 0};
+    Py_buffer salt = {.buf = "", .obj = NULL, .len = 0};
+    Py_buffer person = {.buf = "", .obj = NULL, .len = 0};
     int fanout = 1;
     int depth = 1;
     unsigned long leaf_size = 0;
@@ -502,4 +502,4 @@ _blake2_blake2b_hexdigest(PyObject *self, PyObject 
*Py_UNUSED(ignored))
 {
     return _blake2_blake2b_hexdigest_impl((Blake2Object *)self);
 }
-/*[clinic end generated code: output=eed18dcfaf6f7731 input=a9049054013a1b77]*/
+/*[clinic end generated code: output=bf30e70c312718cb input=a9049054013a1b77]*/
diff --git a/Modules/clinic/zlibmodule.c.h b/Modules/clinic/zlibmodule.c.h
index 2710f65a840db9..64879097ac753a 100644
--- a/Modules/clinic/zlibmodule.c.h
+++ b/Modules/clinic/zlibmodule.c.h
@@ -205,7 +205,7 @@ zlib_decompress(PyObject *module, PyObject *const *args, 
Py_ssize_t nargs, PyObj
 PyDoc_STRVAR(zlib_compressobj__doc__,
 "compressobj($module, /, level=Z_DEFAULT_COMPRESSION, method=DEFLATED,\n"
 "            wbits=MAX_WBITS, memLevel=DEF_MEM_LEVEL,\n"
-"            strategy=Z_DEFAULT_STRATEGY, zdict=None)\n"
+"            strategy=Z_DEFAULT_STRATEGY, zdict=<unrepresentable>)\n"
 "--\n"
 "\n"
 "Return a compressor object.\n"
@@ -1121,4 +1121,4 @@ zlib_crc32(PyObject *module, PyObject *const *args, 
Py_ssize_t nargs)
 #ifndef ZLIB_DECOMPRESS___DEEPCOPY___METHODDEF
     #define ZLIB_DECOMPRESS___DEEPCOPY___METHODDEF
 #endif /* !defined(ZLIB_DECOMPRESS___DEEPCOPY___METHODDEF) */
-/*[clinic end generated code: output=33938c7613a8c1c7 input=a9049054013a1b77]*/
+/*[clinic end generated code: output=3611ce90fe05accb input=a9049054013a1b77]*/
diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c
index bb9ef0e6da6c77..31b2d28200c4ab 100644
--- a/Modules/posixmodule.c
+++ b/Modules/posixmodule.c
@@ -3057,25 +3057,22 @@ class path_t_converter(CConverter):
     type = "path_t"
     impl_by_reference = True
     parse_by_reference = True
+    default_type = ()
+    c_init_default = "<placeholder>"  # overridden in pre_render(()
 
     converter = 'path_converter'
 
     def converter_init(self, *, allow_fd=False, make_wide=None,
                        nonstrict=False, nullable=False,
                        suppress_value_error=False):
-        # right now path_t doesn't support default values.
-        # to support a default value, you'll need to override initialize().
-        if self.default not in (unspecified, None):
-            fail("Can't specify a default to the path_t converter!")
-
-        if self.c_default not in (None, 'Py_None'):
-            raise RuntimeError("Can't specify a c_default to the path_t 
converter!")
 
         self.nullable = nullable
         self.nonstrict = nonstrict
         self.make_wide = make_wide
         self.suppress_value_error = suppress_value_error
         self.allow_fd = allow_fd
+        if nullable:
+            self.default_type = NoneType
 
     def pre_render(self):
         def strify(value):
@@ -3110,6 +3107,8 @@ class path_t_converter(CConverter):
 
 class dir_fd_converter(CConverter):
     type = 'int'
+    default_type = NoneType
+    c_init_default = 'DEFAULT_DIR_FD'
 
     def converter_init(self, requires=None):
         if self.default in (unspecified, None):
@@ -3119,6 +3118,9 @@ class dir_fd_converter(CConverter):
         else:
             self.converter = 'dir_fd_converter'
 
+    def c_default_init(self):
+        self.c_default = 'DEFAULT_DIR_FD'
+
 class uid_t_converter(CConverter):
     type = "uid_t"
     converter = '_Py_Uid_Converter'
@@ -3199,7 +3201,7 @@ class confname_converter(CConverter):
         """, argname=argname, converter=self.converter, table=self.table)
 
 [python start generated code]*/
-/*[python end generated code: output=da39a3ee5e6b4b0d input=d2759f2332cd39b3]*/
+/*[python end generated code: output=da39a3ee5e6b4b0d input=d58f18bdf3bd3565]*/
 
 /*[clinic input]
 
diff --git a/Modules/zlibmodule.c b/Modules/zlibmodule.c
index cb360f261608bd..5b6b0c5cac864a 100644
--- a/Modules/zlibmodule.c
+++ b/Modules/zlibmodule.c
@@ -556,7 +556,7 @@ zlib.compressobj
     strategy: int(c_default="Z_DEFAULT_STRATEGY") = Z_DEFAULT_STRATEGY
         Used to tune the compression algorithm.  Possible values are
         Z_DEFAULT_STRATEGY, Z_FILTERED, and Z_HUFFMAN_ONLY.
-    zdict: Py_buffer = None
+    zdict: Py_buffer = NULL
         The predefined compression dictionary - a sequence of bytes
         containing subsequences that are likely to occur in the input data.
 
@@ -566,7 +566,7 @@ Return a compressor object.
 static PyObject *
 zlib_compressobj_impl(PyObject *module, int level, int method, int wbits,
                       int memLevel, int strategy, Py_buffer *zdict)
-/*[clinic end generated code: output=8b5bed9c8fc3814d input=2fa3d026f90ab8d5]*/
+/*[clinic end generated code: output=8b5bed9c8fc3814d input=1a6f61d8a8885c0d]*/
 {
     zlibstate *state = get_zlib_state(module);
     if (zdict->buf != NULL && (size_t)zdict->len > UINT_MAX) {
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 3835b8d462a10d..53f219eb185d77 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -87,14 +87,12 @@ class Py_UCS4_converter(CConverter):
     type = 'Py_UCS4'
     converter = 'convert_uc'
 
-    def converter_init(self):
-        if self.default is not unspecified:
-            self.c_default = ascii(self.default)
-            if len(self.c_default) > 4 or self.c_default[0] != "'":
-                self.c_default = hex(ord(self.default))
+    def c_default_init(self):
+        import libclinic
+        self.c_default = libclinic.c_unichar_repr(self.default)
 
 [python start generated code]*/
-/*[python end generated code: output=da39a3ee5e6b4b0d input=88f5dd06cd8e7a61]*/
+/*[python end generated code: output=da39a3ee5e6b4b0d input=22f057b68fd9a65a]*/
 
 /* --- Globals ------------------------------------------------------------
 
diff --git a/Tools/c-analyzer/cpython/_parser.py 
b/Tools/c-analyzer/cpython/_parser.py
index 2d6726faf7757a..f5dcd5c76c55f2 100644
--- a/Tools/c-analyzer/cpython/_parser.py
+++ b/Tools/c-analyzer/cpython/_parser.py
@@ -333,6 +333,7 @@ def format_tsv_lines(lines):
     _abs('Modules/_ssl_data_300.h'): (80_000, 10_000),
     _abs('Modules/_ssl_data_111.h'): (80_000, 10_000),
     _abs('Modules/cjkcodecs/mappings_*.h'): (160_000, 2_000),
+    _abs('Modules/clinic/_testclinic.c.h'): (120_000, 5_000),
     _abs('Modules/unicodedata_db.h'): (180_000, 3_000),
     _abs('Modules/unicodename_db.h'): (1_200_000, 15_000),
     _abs('Objects/unicodetype_db.h'): (240_000, 3_000),
diff --git a/Tools/clinic/libclinic/__init__.py 
b/Tools/clinic/libclinic/__init__.py
index 7c5cede2396677..742f1448146a0f 100644
--- a/Tools/clinic/libclinic/__init__.py
+++ b/Tools/clinic/libclinic/__init__.py
@@ -7,7 +7,9 @@
 )
 from .formatting import (
     SIG_END_MARKER,
-    c_repr,
+    c_str_repr,
+    c_bytes_repr,
+    c_unichar_repr,
     docstring_for_c_string,
     format_escape,
     indent_all_lines,
@@ -26,7 +28,7 @@
 from .utils import (
     FormatCounterFormatter,
     NULL,
-    Null,
+    NullType,
     Sentinels,
     VersionTuple,
     compute_checksum,
@@ -45,7 +47,9 @@
 
     # Formatting helpers
     "SIG_END_MARKER",
-    "c_repr",
+    "c_str_repr",
+    "c_bytes_repr",
+    "c_unichar_repr",
     "docstring_for_c_string",
     "format_escape",
     "indent_all_lines",
@@ -64,7 +68,7 @@
     # Utility functions
     "FormatCounterFormatter",
     "NULL",
-    "Null",
+    "NullType",
     "Sentinels",
     "VersionTuple",
     "compute_checksum",
diff --git a/Tools/clinic/libclinic/clanguage.py 
b/Tools/clinic/libclinic/clanguage.py
index 341667d2f0bff9..7f02c7790f015a 100644
--- a/Tools/clinic/libclinic/clanguage.py
+++ b/Tools/clinic/libclinic/clanguage.py
@@ -101,7 +101,7 @@ def compiler_deprecated_warning(
         code = self.COMPILER_DEPRECATION_WARNING_PROTOTYPE.format(
             major=minversion[0],
             minor=minversion[1],
-            message=libclinic.c_repr(message),
+            message=libclinic.c_str_repr(message),
         )
         return libclinic.normalize_snippet(code)
 
diff --git a/Tools/clinic/libclinic/converter.py 
b/Tools/clinic/libclinic/converter.py
index 2c93dda3541030..3d375dd3fdd70d 100644
--- a/Tools/clinic/libclinic/converter.py
+++ b/Tools/clinic/libclinic/converter.py
@@ -6,7 +6,7 @@
 
 import libclinic
 from libclinic import fail
-from libclinic import Sentinels, unspecified, unknown
+from libclinic import Sentinels, unspecified, unknown, NULL
 from libclinic.codegen import CRenderData, Include, TemplateDict
 from libclinic.function import Function, Parameter
 
@@ -83,9 +83,9 @@ class CConverter(metaclass=CConverterAutoRegister):
     # at runtime).
     default: object = unspecified
 
-    # If not None, default must be isinstance() of this type.
+    # default must be isinstance() of this type.
     # (You can also specify a tuple of types.)
-    default_type: bltns.type[object] | tuple[bltns.type[object], ...] | None = 
None
+    default_type: bltns.type[object] | tuple[bltns.type[object], ...] = object
 
     # "default" converted into a C value, as a string.
     # Or None if there is no default.
@@ -95,6 +95,13 @@ class CConverter(metaclass=CConverterAutoRegister):
     # Or None if there is no default.
     py_default: str | None = None
 
+    # The default value used to initialize the C variable when
+    # there is no default.
+    #
+    # Every non-abstract subclass with non-trivial cleanup() should supply
+    # a valid value.
+    c_init_default: str = ''
+
     # The default value used to initialize the C variable when
     # there is no default, but not specifying a default may
     # result in an "uninitialized variable" warning.  This can
@@ -105,7 +112,7 @@ class CConverter(metaclass=CConverterAutoRegister):
     #
     # This value is specified as a string.
     # Every non-abstract subclass should supply a valid value.
-    c_ignored_default: str = 'NULL'
+    c_ignored_default: str = ''
 
     # If true, wrap with Py_UNUSED.
     unused = False
@@ -182,9 +189,25 @@ def __init__(self,
         self.unused = unused
         self._includes: list[Include] = []
 
+        if c_default:
+            self.c_default = c_default
+        if py_default:
+            self.py_default = py_default
+
+        if annotation is not unspecified:
+            fail("The 'annotation' parameter is not currently permitted.")
+
+        # Make sure not to set self.function until after converter_init() has 
been called.
+        # This prevents you from caching information
+        # about the function in converter_init().
+        # (That breaks if we get cloned.)
+        self.converter_init(**kwargs)
+
         if default is not unspecified:
-            if (self.default_type
-                and default is not unknown
+            if self.default_type == ():
+                conv_name = self.__class__.__name__.removesuffix('_converter')
+                fail(f"A '{conv_name}' parameter cannot be marked optional.")
+            if (default is not unknown
                 and not isinstance(default, self.default_type)
             ):
                 if isinstance(self.default_type, type):
@@ -197,19 +220,38 @@ def __init__(self,
                      f"{name!r} is not of type {types_str!r}")
             self.default = default
 
-        if c_default:
-            self.c_default = c_default
-        if py_default:
-            self.py_default = py_default
-
-        if annotation is not unspecified:
-            fail("The 'annotation' parameter is not currently permitted.")
+        if not self.c_default:
+            if default is unspecified:
+                if self.c_init_default:
+                    self.c_default = self.c_init_default
+            elif default is NULL:
+                self.c_default = self.c_ignored_default or self.c_init_default
+                if not self.c_default:
+                    cls_name = self.__class__.__name__
+                    fail(f"{cls_name}: c_default is required for "
+                         f"default value NULL")
+            else:
+                assert default is not unknown
+                self.c_default_init()
+                if not self.c_default:
+                    if default is None:
+                        self.c_default = self.c_init_default
+                        if not self.c_default:
+                            cls_name = self.__class__.__name__
+                            fail(f"{cls_name}: c_default is required for "
+                                 f"default value None")
+                    elif isinstance(default, str):
+                        self.c_default = libclinic.c_str_repr(default)
+                    elif isinstance(default, bytes):
+                        self.c_default = libclinic.c_bytes_repr(default)
+                    elif isinstance(default, (int, float)):
+                        self.c_default = repr(default)
+                    else:
+                        cls_name = self.__class__.__name__
+                        fail(f"{cls_name}: c_default is required for "
+                             f"default value {default!r}")
+                        fail(f"Unsupported default value {default!r}.")
 
-        # Make sure not to set self.function until after converter_init() has 
been called.
-        # This prevents you from caching information
-        # about the function in converter_init().
-        # (That breaks if we get cloned.)
-        self.converter_init(**kwargs)
         self.function = function
 
     # Add a custom __getattr__ method to improve the error message
@@ -233,6 +275,9 @@ def __getattr__(self, attr):
     def converter_init(self) -> None:
         pass
 
+    def c_default_init(self) -> None:
+        return
+
     def is_optional(self) -> bool:
         return (self.default is not unspecified)
 
@@ -324,7 +369,7 @@ def parse_argument(self, args: list[str]) -> None:
             args.append(self.converter)
 
         if self.encoding:
-            args.append(libclinic.c_repr(self.encoding))
+            args.append(libclinic.c_str_repr(self.encoding))
         elif self.subclass_of:
             args.append(self.subclass_of)
 
@@ -371,7 +416,7 @@ def declaration(self, *, in_parser: bool = False) -> str:
         declaration = [self.simple_declaration(in_parser=True)]
         default = self.c_default
         if not default and self.parameter.group:
-            default = self.c_ignored_default
+            default = self.c_ignored_default or self.c_init_default
         if default:
             declaration.append(" = ")
             declaration.append(default)
diff --git a/Tools/clinic/libclinic/converters.py 
b/Tools/clinic/libclinic/converters.py
index 8c92b766ba0862..64fc1e95007516 100644
--- a/Tools/clinic/libclinic/converters.py
+++ b/Tools/clinic/libclinic/converters.py
@@ -4,7 +4,7 @@
 from types import NoneType
 from typing import Any
 
-from libclinic import fail, Null, unspecified, unknown
+from libclinic import fail, NullType, unspecified, NULL, c_bytes_repr, 
c_unichar_repr
 from libclinic.function import (
     Function, Parameter,
     CALLABLE, STATIC_METHOD, CLASS_METHOD, METHOD_INIT, METHOD_NEW,
@@ -18,6 +18,9 @@
 
 
 class BaseUnsignedIntConverter(CConverter):
+    bitwise = False
+    default_type = int
+    c_ignored_default = '0'
 
     def use_converter(self) -> None:
         if self.converter:
@@ -74,12 +77,13 @@ class bool_converter(CConverter):
     def converter_init(self, *, accept: TypeSet = {object}) -> None:
         if accept == {int}:
             self.format_unit = 'i'
+            self.default_type = int  # type: ignore[assignment]
         elif accept != {object}:
             fail(f"bool_converter: illegal 'accept' argument {accept!r}")
-        if self.default is not unspecified and self.default is not unknown:
-            self.default = bool(self.default)
-            if self.c_default in {'Py_True', 'Py_False'}:
-                self.c_default = str(int(self.default))
+
+    def c_default_init(self) -> None:
+        assert isinstance(self.default, int)
+        self.c_default = str(int(self.default))
 
     def parse_arg(self, argname: str, displayname: str, *, limited_capi: bool) 
-> str | None:
         if self.format_unit == 'i':
@@ -107,6 +111,7 @@ class defining_class_converter(CConverter):
     this is the default converter used for the defining class.
     """
     type = 'PyTypeObject *'
+    default_type = ()
     format_unit = ''
     show_in_signature = False
     specified_type: str | None = None
@@ -123,7 +128,7 @@ def set_template_dict(self, template_dict: TemplateDict) -> 
None:
 
 class char_converter(CConverter):
     type = 'char'
-    default_type = (bytes, bytearray)
+    default_type = bytes
     format_unit = 'c'
     c_ignored_default = "'\0'"
 
@@ -132,9 +137,18 @@ def converter_init(self) -> None:
             if len(self.default) != 1:
                 fail(f"char_converter: illegal default value {self.default!r}")
 
-            self.c_default = repr(bytes(self.default))[1:]
-            if self.c_default == '"\'"':
-                self.c_default = r"'\''"
+    def c_default_init(self) -> None:
+        default = self.default
+        assert isinstance(default, bytes)
+        if default == b"'":
+            self.c_default = r"'\''"
+        elif default == b'"':
+            self.c_default = r"""'"'"""
+        elif default == b'\0':
+            self.c_default = r"'\0'"
+        else:
+            r = c_bytes_repr(default)[1:-1]
+            self.c_default = "'" + r + "'"
 
     def parse_arg(self, argname: str, displayname: str, *, limited_capi: bool) 
-> str | None:
         if self.format_unit == 'c':
@@ -174,7 +188,6 @@ def parse_arg(self, argname: str, displayname: str, *, 
limited_capi: bool) -> st
 @add_legacy_c_converter('B', bitwise=True)
 class unsigned_char_converter(CConverter):
     type = 'unsigned char'
-    default_type = int
     format_unit = 'b'
     c_ignored_default = "'\0'"
 
@@ -261,8 +274,6 @@ def parse_arg(self, argname: str, displayname: str, *, 
limited_capi: bool) -> st
 
 class unsigned_short_converter(BaseUnsignedIntConverter):
     type = 'unsigned short'
-    default_type = int
-    c_ignored_default = "0"
 
     def converter_init(self, *, bitwise: bool = False) -> None:
         if bitwise:
@@ -294,11 +305,19 @@ def converter_init(
     ) -> None:
         if accept == {str}:
             self.format_unit = 'C'
+            self.default_type = str  # type: ignore[assignment]
+            if isinstance(self.default, str):
+                if len(self.default) != 1:
+                    fail(f"int_converter: illegal default value 
{self.default!r}")
         elif accept != {int}:
             fail(f"int_converter: illegal 'accept' argument {accept!r}")
         if type is not None:
             self.type = type
 
+    def c_default_init(self) -> None:
+        if isinstance(self.default, str):
+            self.c_default = c_unichar_repr(self.default)
+
     def parse_arg(self, argname: str, displayname: str, *, limited_capi: bool) 
-> str | None:
         if self.format_unit == 'i':
             return self.format_code("""
@@ -332,8 +351,6 @@ def parse_arg(self, argname: str, displayname: str, *, 
limited_capi: bool) -> st
 
 class unsigned_int_converter(BaseUnsignedIntConverter):
     type = 'unsigned int'
-    default_type = int
-    c_ignored_default = "0"
 
     def converter_init(self, *, bitwise: bool = False) -> None:
         if bitwise:
@@ -373,8 +390,6 @@ def parse_arg(self, argname: str, displayname: str, *, 
limited_capi: bool) -> st
 
 class unsigned_long_converter(BaseUnsignedIntConverter):
     type = 'unsigned long'
-    default_type = int
-    c_ignored_default = "0"
 
     def converter_init(self, *, bitwise: bool = False) -> None:
         if bitwise:
@@ -417,8 +432,6 @@ def parse_arg(self, argname: str, displayname: str, *, 
limited_capi: bool) -> st
 
 class unsigned_long_long_converter(BaseUnsignedIntConverter):
     type = 'unsigned long long'
-    default_type = int
-    c_ignored_default = "0"
 
     def converter_init(self, *, bitwise: bool = False) -> None:
         if bitwise:
@@ -443,12 +456,13 @@ def parse_arg(self, argname: str, displayname: str, *, 
limited_capi: bool) -> st
 
 class Py_ssize_t_converter(CConverter):
     type = 'Py_ssize_t'
+    default_type = (int, NoneType)
     c_ignored_default = "0"
 
     def converter_init(self, *, accept: TypeSet = {int}) -> None:
         if accept == {int}:
             self.format_unit = 'n'
-            self.default_type = int
+            self.default_type = int  # type: ignore[assignment]
         elif accept == {int, NoneType}:
             self.converter = '_Py_convert_optional_to_ssize_t'
         else:
@@ -505,10 +519,13 @@ def parse_arg(self, argname: str, displayname: str, *, 
limited_capi: bool) -> st
 
 class slice_index_converter(CConverter):
     type = 'Py_ssize_t'
+    default_type = (int, NoneType)
+    c_ignored_default = "0"
 
     def converter_init(self, *, accept: TypeSet = {int, NoneType}) -> None:
         if accept == {int}:
             self.converter = '_PyEval_SliceIndexNotNone'
+            self.default_type = int  # type: ignore[assignment]
             self.nullable = False
         elif accept == {int, NoneType}:
             self.converter = '_PyEval_SliceIndex'
@@ -558,7 +575,6 @@ def parse_arg(self, argname: str, displayname: str, *, 
limited_capi: bool) -> st
 class size_t_converter(BaseUnsignedIntConverter):
     type = 'size_t'
     converter = '_PyLong_Size_t_Converter'
-    c_ignored_default = "0"
 
     def parse_arg(self, argname: str, displayname: str, *, limited_capi: bool) 
-> str | None:
         if self.format_unit == 'n':
@@ -677,6 +693,7 @@ def parse_arg(self, argname: str, displayname: str, *, 
limited_capi: bool) -> st
 class object_converter(CConverter):
     type = 'PyObject *'
     format_unit = 'O'
+    c_ignored_default = 'NULL'
 
     def converter_init(
             self, *,
@@ -696,6 +713,10 @@ def converter_init(
         if type is not None:
             self.type = type
 
+    def c_default_init(self) -> None:
+        default = self.default
+        if default is None or isinstance(default, bool):
+            self.c_default = "Py_" + repr(default)
 
 #
 # We define three conventions for buffer types in the 'accept' argument:
@@ -725,8 +746,9 @@ def str_converter_key(
 
 class str_converter(CConverter):
     type = 'const char *'
-    default_type = (str, Null, NoneType)
+    default_type = (str, bytes, NullType, NoneType)
     format_unit = 's'
+    c_ignored_default = 'NULL'
 
     def converter_init(
             self,
@@ -744,14 +766,16 @@ def converter_init(
         self.format_unit = format_unit
         self.length = bool(zeroes)
         if encoding:
-            if self.default not in (Null, None, unspecified):
+            if self.default not in (NULL, None, unspecified):
                 fail("str_converter: Argument Clinic doesn't support default 
values for encoded strings")
             self.encoding = encoding
             self.type = 'char *'
             # sorry, clinic can't support preallocated buffers
             # for es# and et#
             self.c_default = "NULL"
-        if NoneType in accept and self.c_default == "Py_None":
+
+    def c_default_init(self) -> None:
+        if self.default is None:
             self.c_default = "NULL"
 
     def post_parsing(self) -> str:
@@ -864,6 +888,7 @@ class PyBytesObject_converter(CConverter):
     type = 'PyBytesObject *'
     format_unit = 'S'
     # accept = {bytes}
+    c_ignored_default = 'NULL'
 
     def parse_arg(self, argname: str, displayname: str, *, limited_capi: bool) 
-> str | None:
         if self.format_unit == 'S':
@@ -884,6 +909,7 @@ class PyByteArrayObject_converter(CConverter):
     type = 'PyByteArrayObject *'
     format_unit = 'Y'
     # accept = {bytearray}
+    c_ignored_default = 'NULL'
 
     def parse_arg(self, argname: str, displayname: str, *, limited_capi: bool) 
-> str | None:
         if self.format_unit == 'Y':
@@ -902,8 +928,9 @@ def parse_arg(self, argname: str, displayname: str, *, 
limited_capi: bool) -> st
 
 class unicode_converter(CConverter):
     type = 'PyObject *'
-    default_type = (str, Null, NoneType)
+    default_type = (str, NullType, NoneType)
     format_unit = 'U'
+    c_ignored_default = 'NULL'
 
     def parse_arg(self, argname: str, displayname: str, *, limited_capi: bool) 
-> str | None:
         if self.format_unit == 'U':
@@ -922,11 +949,11 @@ def parse_arg(self, argname: str, displayname: str, *, 
limited_capi: bool) -> st
 
 class _unicode_fs_converter_base(CConverter):
     type = 'PyObject *'
+    default_type = NullType
+    c_init_default = 'NULL'
 
-    def converter_init(self) -> None:
-        if self.default is not unspecified:
-            fail(f"{self.__class__.__name__} does not support default values")
-        self.c_default = 'NULL'
+    def c_default_init(self) -> None:
+        fail(f"{self.__class__.__name__} does not support default values")
 
     def cleanup(self) -> str:
         return f"Py_XDECREF({self.parser_name});"
@@ -946,7 +973,8 @@ class 
unicode_fs_decoded_converter(_unicode_fs_converter_base):
 @add_legacy_c_converter('Z#', accept={str, NoneType}, zeroes=True)
 class Py_UNICODE_converter(CConverter):
     type = 'const wchar_t *'
-    default_type = (str, Null, NoneType)
+    default_type = (str, NullType, NoneType)
+    c_ignored_default = 'NULL'
 
     def converter_init(
             self, *,
@@ -962,6 +990,7 @@ def converter_init(
             self.accept = accept
             if accept == {str}:
                 self.converter = '_PyUnicode_WideCharString_Converter'
+                self.default_type = (str, NullType)  # type: ignore[assignment]
             elif accept == {str, NoneType}:
                 self.converter = '_PyUnicode_WideCharString_Opt_Converter'
             else:
@@ -1017,28 +1046,34 @@ def parse_arg(self, argname: str, displayname: str, *, 
limited_capi: bool) -> st
 @add_legacy_c_converter('w*', accept={rwbuffer})
 class Py_buffer_converter(CConverter):
     type = 'Py_buffer'
+    default_type = (str, bytes, NullType, NoneType)
     format_unit = 'y*'
     impl_by_reference = True
-    c_ignored_default = "{NULL, NULL}"
+    c_init_default = "{NULL, NULL}"
 
     def converter_init(self, *, accept: TypeSet = {buffer}) -> None:
-        if self.default not in (unspecified, None):
-            fail("The only legal default value for Py_buffer is None.")
-
-        self.c_default = self.c_ignored_default
-
         if accept == {str, buffer, NoneType}:
-            format_unit = 'z*'
+            self.format_unit = 'z*'
+            self.default_type = (str, bytes, NullType, NoneType)
         elif accept == {str, buffer}:
-            format_unit = 's*'
+            self.format_unit = 's*'
+            self.default_type = (str, bytes, NullType)  # type: 
ignore[assignment]
         elif accept == {buffer}:
-            format_unit = 'y*'
+            self.format_unit = 'y*'
+            self.default_type = (bytes, NullType)  # type: ignore[assignment]
         elif accept == {rwbuffer}:
-            format_unit = 'w*'
+            self.format_unit = 'w*'
+            self.default_type = NullType  # type: ignore[assignment]
         else:
             fail("Py_buffer_converter: illegal combination of arguments")
 
-        self.format_unit = format_unit
+    def c_default_init(self) -> None:
+        default = self.default
+        if isinstance(default, bytes):
+            self.c_default = f'{{.buf = {c_bytes_repr(default)}, .obj = NULL, 
.len = {len(default)}}}'
+        elif isinstance(default, str):
+            default = default.encode()
+            self.c_default = f'{{.buf = {c_bytes_repr(default)}, .obj = NULL, 
.len = {len(default)}}}'
 
     def cleanup(self) -> str:
         name = self.name
@@ -1119,6 +1154,7 @@ class self_converter(CConverter):
     this is the default converter used for "self".
     """
     type: str | None = None
+    default_type = ()
     format_unit = ''
     specified_type: str | None = None
 
@@ -1233,6 +1269,7 @@ def use_pyobject_self(self, func: Function) -> bool:
 # Converters for var-positional parameter.
 
 class VarPosCConverter(CConverter):
+    default_type = ()
     format_unit = ''
 
     def parse_arg(self, argname: str, displayname: str, *, limited_capi: bool) 
-> str | None:
@@ -1245,8 +1282,7 @@ def parse_vararg(self, *, pos_only: int, min_pos: int, 
max_pos: int,
 
 class varpos_tuple_converter(VarPosCConverter):
     type = 'PyObject *'
-    format_unit = ''
-    c_default = 'NULL'
+    c_init_default = 'NULL'
 
     def cleanup(self) -> str:
         return f"""Py_XDECREF({self.parser_name});\n"""
@@ -1304,7 +1340,6 @@ def parse_vararg(self, *, pos_only: int, min_pos: int, 
max_pos: int,
 class varpos_array_converter(VarPosCConverter):
     type = 'PyObject * const *'
     length = True
-    c_ignored_default = ''
 
     def parse_vararg(self, *, pos_only: int, min_pos: int, max_pos: int,
                      fastcall: bool, limited_capi: bool) -> str:
diff --git a/Tools/clinic/libclinic/dsl_parser.py 
b/Tools/clinic/libclinic/dsl_parser.py
index eca41531f7c8e9..6ead9bf2022833 100644
--- a/Tools/clinic/libclinic/dsl_parser.py
+++ b/Tools/clinic/libclinic/dsl_parser.py
@@ -7,7 +7,7 @@
 import shlex
 import sys
 from collections.abc import Callable
-from types import FunctionType, NoneType
+from types import FunctionType
 from typing import TYPE_CHECKING, Any, NamedTuple
 
 import libclinic
@@ -914,16 +914,17 @@ def parse_parameter(self, line: str) -> None:
             name = 'varpos_' + name
 
         value: object
+        has_c_default = 'c_default' in kwargs
         if not function_args.defaults:
-            if is_vararg:
-                value = NULL
-            else:
-                if self.parameter_state is ParamState.OPTIONAL:
-                    fail(f"Can't have a parameter without a default 
({parameter_name!r}) "
-                          "after a parameter with a default!")
-                value = unspecified
+            value = unspecified
+            if (not is_vararg
+                    and self.parameter_state is ParamState.OPTIONAL):
+                fail(f"Can't have a parameter without a default 
({parameter_name!r}) "
+                     "after a parameter with a default!")
             if 'py_default' in kwargs:
                 fail("You can't specify py_default without specifying a 
default value!")
+            if has_c_default:
+                fail("You can't specify c_default without specifying a default 
value!")
         else:
             expr = function_args.defaults[0]
             default = ast_input[expr.col_offset: expr.end_col_offset].strip()
@@ -932,7 +933,7 @@ def parse_parameter(self, line: str) -> None:
                 self.parameter_state = ParamState.OPTIONAL
             bad = False
             try:
-                if 'c_default' not in kwargs:
+                if not has_c_default:
                     # we can only represent very simple data values in C.
                     # detect whether default is okay, via a denylist
                     # of disallowed ast nodes.
@@ -978,18 +979,15 @@ def bad_node(self, node: ast.AST) -> None:
                     fail(f"Unsupported expression as default value: 
{default!r}")
 
                 # mild hack: explicitly support NULL as a default value
-                c_default: str | None
                 if isinstance(expr, ast.Name) and expr.id == 'NULL':
                     value = NULL
                     py_default = '<unrepresentable>'
-                    c_default = "NULL"
                 elif (isinstance(expr, ast.BinOp) or
                     (isinstance(expr, ast.UnaryOp) and
                      not (isinstance(expr.operand, ast.Constant) and
                           type(expr.operand.value) in {int, float, complex})
                     )):
-                    c_default = kwargs.get("c_default")
-                    if not (isinstance(c_default, str) and c_default):
+                    if not has_c_default:
                         fail(f"When you specify an expression ({default!r}) "
                              f"as your default value, "
                              f"you MUST specify a valid c_default.",
@@ -1008,8 +1006,7 @@ def bad_node(self, node: ast.AST) -> None:
                     a.append(n.id)
                     py_default = ".".join(reversed(a))
 
-                    c_default = kwargs.get("c_default")
-                    if not (isinstance(c_default, str) and c_default):
+                    if not has_c_default:
                         fail(f"When you specify a named constant 
({py_default!r}) "
                              "as your default value, "
                              "you MUST specify a valid c_default.")
@@ -1021,23 +1018,15 @@ def bad_node(self, node: ast.AST) -> None:
                 else:
                     value = ast.literal_eval(expr)
                     py_default = repr(value)
-                    if isinstance(value, (bool, NoneType)):
-                        c_default = "Py_" + py_default
-                    elif isinstance(value, str):
-                        c_default = libclinic.c_repr(value)
-                    else:
-                        c_default = py_default
 
             except (ValueError, AttributeError):
                 value = unknown
-                c_default = kwargs.get("c_default")
                 py_default = default
-                if not (isinstance(c_default, str) and c_default):
+                if not has_c_default:
                     fail("When you specify a named constant "
                          f"({py_default!r}) as your default value, "
                          "you MUST specify a valid c_default.")
 
-            kwargs.setdefault('c_default', c_default)
             kwargs.setdefault('py_default', py_default)
 
         dict = legacy_converters if legacy else converters
@@ -1058,12 +1047,10 @@ def bad_node(self, node: ast.AST) -> None:
 
         if isinstance(converter, self_converter):
             if len(self.function.parameters) == 1:
-                if self.parameter_state is not ParamState.REQUIRED:
-                    fail("A 'self' parameter cannot be marked optional.")
-                if value is not unspecified:
-                    fail("A 'self' parameter cannot have a default value.")
                 if self.group:
                     fail("A 'self' parameter cannot be in an optional group.")
+                assert self.parameter_state is ParamState.REQUIRED
+                assert value is unspecified
                 kind = inspect.Parameter.POSITIONAL_ONLY
                 self.parameter_state = ParamState.START
                 self.function.parameters.clear()
@@ -1074,14 +1061,12 @@ def bad_node(self, node: ast.AST) -> None:
         if isinstance(converter, defining_class_converter):
             _lp = len(self.function.parameters)
             if _lp == 1:
-                if self.parameter_state is not ParamState.REQUIRED:
-                    fail("A 'defining_class' parameter cannot be marked 
optional.")
-                if value is not unspecified:
-                    fail("A 'defining_class' parameter cannot have a default 
value.")
                 if self.group:
                     fail("A 'defining_class' parameter cannot be in an 
optional group.")
                 if self.function.cls is None:
                     fail("A 'defining_class' parameter cannot be defined at 
module level.")
+                assert self.parameter_state is ParamState.REQUIRED
+                assert value is unspecified
                 kind = inspect.Parameter.POSITIONAL_ONLY
             else:
                 fail("A 'defining_class' parameter, if specified, must either "
diff --git a/Tools/clinic/libclinic/formatting.py 
b/Tools/clinic/libclinic/formatting.py
index 873ece6210017a..264327818c1d19 100644
--- a/Tools/clinic/libclinic/formatting.py
+++ b/Tools/clinic/libclinic/formatting.py
@@ -39,8 +39,55 @@ def _quoted_for_c_string(text: str) -> str:
     return text
 
 
-def c_repr(text: str) -> str:
-    return '"' + text + '"'
+# Use octals, because \x... in C has arbitrary number of hexadecimal digits.
+_c_repr = [chr(i) if 32 <= i < 127 else fr'\{i:03o}' for i in range(256)]
+_c_repr[ord('"')] = r'\"'
+_c_repr[ord('\\')] = r'\\'
+_c_repr[ord('\a')] = r'\a'
+_c_repr[ord('\b')] = r'\b'
+_c_repr[ord('\f')] = r'\f'
+_c_repr[ord('\n')] = r'\n'
+_c_repr[ord('\r')] = r'\r'
+_c_repr[ord('\t')] = r'\t'
+_c_repr[ord('\v')] = r'\v'
+
+def _break_trigraphs(s: str) -> str:
+    # Prevent trigraphs from being interpreted inside string literals.
+    if '??' in s:
+        s = s.replace('??', r'?\?')
+        s = s.replace(r'\??', r'\?\?')
+    # Also Argument Clinic does not like comment-like sequences
+    # in string literals.
+    s = s.replace(r'/*', r'/\*')
+    s = s.replace(r'*/', r'*\/')
+    return s
+
+def c_bytes_repr(data: bytes) -> str:
+    r = ''.join(_c_repr[i] for i in data)
+    r = _break_trigraphs(r)
+    return '"' + r + '"'
+
+def c_str_repr(text: str) -> str:
+    r = ''.join(_c_repr[i] if i < 0x80
+                else fr'\u{i:04x}' if i < 0x10000
+                else fr'\U{i:08x}'
+                for i in map(ord, text))
+    r = _break_trigraphs(r)
+    return '"' + r + '"'
+
+def c_unichar_repr(char: str) -> str:
+    if char == "'":
+        return r"'\''"
+    if char == '"':
+        return """'"'"""
+    if char == '\0':
+        return '0'
+    i = ord(char)
+    if i < 0x80:
+        r = _c_repr[i]
+        if not r.startswith((r'\0', r'\1')):
+            return "'" + r + "'"
+    return f'0x{i:02x}'
 
 
 def wrapped_c_string_literal(
@@ -58,8 +105,8 @@ def wrapped_c_string_literal(
         drop_whitespace=False,
         break_on_hyphens=False,
     )
-    separator = c_repr(suffix + "\n" + subsequent_indent * " ")
-    return initial_indent * " " + c_repr(separator.join(wrapped))
+    separator = '"' + suffix + "\n" + subsequent_indent * " " + '"'
+    return initial_indent * " " + '"' + separator.join(wrapped) + '"'
 
 
 def _add_prefix_and_suffix(text: str, *, prefix: str = "", suffix: str = "") 
-> str:
diff --git a/Tools/clinic/libclinic/utils.py b/Tools/clinic/libclinic/utils.py
index 17e8f35be73bf4..3df64f270dd074 100644
--- a/Tools/clinic/libclinic/utils.py
+++ b/Tools/clinic/libclinic/utils.py
@@ -85,9 +85,9 @@ def __repr__(self) -> str:
 
 
 # This one needs to be a distinct class, unlike the other two
-class Null:
+class NullType:
     def __repr__(self) -> str:
         return '<Null>'
 
 
-NULL = Null()
+NULL = NullType()

_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3//lists/python-checkins.python.org
Member address: [email protected]

[Python-checkins] [3.14] gh-144545: Improve handling of default values in Argument Clinic (GH-146016) (GH-146052)

Reply via email to