[Python-checkins] gh-144001: Support ignorechars in binascii.a2b_base64() and base64.b64decode() (GH-144024)

serhiy-storchaka Mon, 26 Jan 2026 10:12:10 -0800

https://github.com/python/cpython/commit/7febbe6b600e63544d5e7000cf377eeead858a39
commit: 7febbe6b600e63544d5e7000cf377eeead858a39
branch: main
author: Serhiy Storchaka <[email protected]>
committer: serhiy-storchaka <[email protected]>
date: 2026-01-26T20:11:40+02:00
summary:


gh-144001: Support ignorechars in binascii.a2b_base64() and base64.b64decode() 
(GH-144024)

files:
A Misc/NEWS.d/next/Library/2026-01-19-10-26-59.gh-issue-144001.dGj8Nk.rst
M Doc/library/base64.rst
M Doc/library/binascii.rst
M Doc/whatsnew/3.15.rst
M Include/internal/pycore_global_objects_fini_generated.h
M Include/internal/pycore_global_strings.h
M Include/internal/pycore_runtime_init_generated.h
M Include/internal/pycore_unicodeobject_generated.h
M Lib/base64.py
M Lib/test/test_base64.py
M Lib/test/test_binascii.py
M Modules/binascii.c
M Modules/clinic/binascii.c.h

diff --git a/Doc/library/base64.rst b/Doc/library/base64.rst
index 64d66fcf6bd50a..65b8aeaef8e939 100644
--- a/Doc/library/base64.rst
+++ b/Doc/library/base64.rst
@@ -73,6 +73,7 @@ POST request.
 
 
 .. function:: b64decode(s, altchars=None, validate=False)
+              b64decode(s, altchars=None, validate=True, *, ignorechars)
 
    Decode the Base64 encoded :term:`bytes-like object` or ASCII string
    *s* and return the decoded :class:`bytes`.
@@ -84,11 +85,17 @@ POST request.
    A :exc:`binascii.Error` exception is raised
    if *s* is incorrectly padded.
 
-   If *validate* is false (the default), characters that are neither
+   If *ignorechars* is specified, it should be a :term:`bytes-like object`
+   containing characters to ignore from the input when *validate* is true.
+   The default value of *validate* is ``True`` if *ignorechars* is specified,
+   ``False`` otherwise.
+
+   If *validate* is false, characters that are neither
    in the normal base-64 alphabet nor the alternative alphabet are
    discarded prior to the padding check, but the ``+`` and ``/`` characters
    keep their meaning if they are not in *altchars* (they will be discarded
    in future Python versions).
+
    If *validate* is true, these non-alphabet characters in the input
    result in a :exc:`binascii.Error`.
 
@@ -99,6 +106,10 @@ POST request.
       is now deprecated.
 
 
+   .. versionchanged:: next
+      Added the *ignorechars* parameter.
+
+
 .. function:: standard_b64encode(s)
 
    Encode :term:`bytes-like object` *s* using the standard Base64 alphabet
diff --git a/Doc/library/binascii.rst b/Doc/library/binascii.rst
index eaf755711bc292..d9f0baedec85f2 100644
--- a/Doc/library/binascii.rst
+++ b/Doc/library/binascii.rst
@@ -49,10 +49,16 @@ The :mod:`binascii` module defines the following functions:
 
 
 .. function:: a2b_base64(string, /, *, strict_mode=False)
+              a2b_base64(string, /, *, strict_mode=True, ignorechars)
 
    Convert a block of base64 data back to binary and return the binary data. 
More
    than one line may be passed at a time.
 
+   If *ignorechars* is specified, it should be a :term:`bytes-like object`
+   containing characters to ignore from the input when *strict_mode* is true.
+   The default value of *strict_mode* is ``True`` if *ignorechars* is 
specified,
+   ``False`` otherwise.
+
    If *strict_mode* is true, only valid base64 data will be converted. Invalid 
base64
    data will raise :exc:`binascii.Error`.
 
@@ -66,6 +72,9 @@ The :mod:`binascii` module defines the following functions:
    .. versionchanged:: 3.11
       Added the *strict_mode* parameter.
 
+   .. versionchanged:: next
+      Added the *ignorechars* parameter.
+
 
 .. function:: b2a_base64(data, *, wrapcol=0, newline=True)
 
diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst
index aec6b1ceea37cf..19c01b71f02fb6 100644
--- a/Doc/whatsnew/3.15.rst
+++ b/Doc/whatsnew/3.15.rst
@@ -444,6 +444,8 @@ base64
 * Added the *wrapcol* parameter in :func:`~base64.b64encode`.
   (Contributed by Serhiy Storchaka in :gh:`143214`.)
 
+* Added the *ignorechars* parameter in :func:`~base64.b64decode`.
+  (Contributed by Serhiy Storchaka in :gh:`144001`.)
 
 binascii
 --------
@@ -451,6 +453,9 @@ binascii
 * Added the *wrapcol* parameter in :func:`~binascii.b2a_base64`.
   (Contributed by Serhiy Storchaka in :gh:`143214`.)
 
+* Added the *ignorechars* parameter in :func:`~binascii.a2b_base64`.
+  (Contributed by Serhiy Storchaka in :gh:`144001`.)
+
 
 calendar
 --------
diff --git a/Include/internal/pycore_global_objects_fini_generated.h 
b/Include/internal/pycore_global_objects_fini_generated.h
index 4a5b2a925413bf..fc297a2933a786 100644
--- a/Include/internal/pycore_global_objects_fini_generated.h
+++ b/Include/internal/pycore_global_objects_fini_generated.h
@@ -1797,6 +1797,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) {
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(ident));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(identity_hint));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(ignore));
+    _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(ignorechars));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(imag));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(implieslink));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(importlib));
diff --git a/Include/internal/pycore_global_strings.h 
b/Include/internal/pycore_global_strings.h
index 7c2f44ef6dbe7a..563ccd7cf6d3f4 100644
--- a/Include/internal/pycore_global_strings.h
+++ b/Include/internal/pycore_global_strings.h
@@ -520,6 +520,7 @@ struct _Py_global_strings {
         STRUCT_FOR_ID(ident)
         STRUCT_FOR_ID(identity_hint)
         STRUCT_FOR_ID(ignore)
+        STRUCT_FOR_ID(ignorechars)
         STRUCT_FOR_ID(imag)
         STRUCT_FOR_ID(implieslink)
         STRUCT_FOR_ID(importlib)
diff --git a/Include/internal/pycore_runtime_init_generated.h 
b/Include/internal/pycore_runtime_init_generated.h
index 6e7bad986dbeda..ba7c0e68434517 100644
--- a/Include/internal/pycore_runtime_init_generated.h
+++ b/Include/internal/pycore_runtime_init_generated.h
@@ -1795,6 +1795,7 @@ extern "C" {
     INIT_ID(ident), \
     INIT_ID(identity_hint), \
     INIT_ID(ignore), \
+    INIT_ID(ignorechars), \
     INIT_ID(imag), \
     INIT_ID(implieslink), \
     INIT_ID(importlib), \
diff --git a/Include/internal/pycore_unicodeobject_generated.h 
b/Include/internal/pycore_unicodeobject_generated.h
index 660115931da0a0..44063794293990 100644
--- a/Include/internal/pycore_unicodeobject_generated.h
+++ b/Include/internal/pycore_unicodeobject_generated.h
@@ -1860,6 +1860,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) 
{
     _PyUnicode_InternStatic(interp, &string);
     assert(_PyUnicode_CheckConsistency(string, 1));
     assert(PyUnicode_GET_LENGTH(string) != 1);
+    string = &_Py_ID(ignorechars);
+    _PyUnicode_InternStatic(interp, &string);
+    assert(_PyUnicode_CheckConsistency(string, 1));
+    assert(PyUnicode_GET_LENGTH(string) != 1);
     string = &_Py_ID(imag);
     _PyUnicode_InternStatic(interp, &string);
     assert(_PyUnicode_CheckConsistency(string, 1));
diff --git a/Lib/base64.py b/Lib/base64.py
index 6e0da16b23ce99..6e9d24f0649320 100644
--- a/Lib/base64.py
+++ b/Lib/base64.py
@@ -26,6 +26,8 @@
     ]
 
 
+_NOT_SPECIFIED = ['NOT SPECIFIED']
+
 bytes_types = (bytes, bytearray)  # Types acceptable as binary data
 
 def _bytes_from_decode_data(s):
@@ -62,7 +64,7 @@ def b64encode(s, altchars=None, *, wrapcol=0):
     return encoded
 
 
-def b64decode(s, altchars=None, validate=False):
+def b64decode(s, altchars=None, validate=_NOT_SPECIFIED, *, 
ignorechars=_NOT_SPECIFIED):
     """Decode the Base64 encoded bytes-like object or ASCII string s.
 
     Optional altchars must be a bytes-like object or ASCII string of length 2
@@ -72,38 +74,64 @@ def b64decode(s, altchars=None, validate=False):
     The result is returned as a bytes object.  A binascii.Error is raised if
     s is incorrectly padded.
 
-    If validate is false (the default), characters that are neither in the
-    normal base-64 alphabet nor the alternative alphabet are discarded prior
-    to the padding check.  If validate is true, these non-alphabet characters
-    in the input result in a binascii.Error.
+    If ignorechars is specified, it should be a byte string containing
+    characters to ignore from the input.  The default value of validate is
+    True if ignorechars is specified, False otherwise.
+
+    If validate is false, characters that are neither in the normal base-64
+    alphabet nor the alternative alphabet are discarded prior to the
+    padding check.  If validate is true, these non-alphabet characters in
+    the input result in a binascii.Error if they are not in ignorechars.
     For more information about the strict base64 check, see:
 
     https://docs.python.org/3.11/library/binascii.html#binascii.a2b_base64
     """
     s = _bytes_from_decode_data(s)
+    if validate is _NOT_SPECIFIED:
+        validate = ignorechars is not _NOT_SPECIFIED
+    if ignorechars is _NOT_SPECIFIED:
+        ignorechars = b''
     badchar = None
+    badchar_strict = False
     if altchars is not None:
         altchars = _bytes_from_decode_data(altchars)
         if len(altchars) != 2:
             raise ValueError(f'invalid altchars: {altchars!r}')
         for b in b'+/':
             if b not in altchars and b in s:
-                badchar = b
-                break
+                if badchar is None:
+                    badchar = b
+                if not validate:
+                    break
+                if not isinstance(ignorechars, (bytes, bytearray)):
+                    ignorechars = memoryview(ignorechars).cast('B')
+                if b not in ignorechars:
+                    badchar_strict = True
+                    badchar = b
+                    break
         s = s.translate(bytes.maketrans(altchars, b'+/'))
-    result = binascii.a2b_base64(s, strict_mode=validate)
+    result = binascii.a2b_base64(s, strict_mode=validate,
+                                 ignorechars=ignorechars)
     if badchar is not None:
         import warnings
-        if validate:
+        if badchar_strict:
             warnings.warn(f'invalid character {chr(badchar)!a} in Base64 data '
                           f'with altchars={altchars!r} and validate=True '
                           f'will be an error in future Python versions',
                           DeprecationWarning, stacklevel=2)
         else:
-            warnings.warn(f'invalid character {chr(badchar)!a} in Base64 data '
-                          f'with altchars={altchars!r} and validate=False '
-                          f'will be discarded in future Python versions',
-                          FutureWarning, stacklevel=2)
+            ignorechars = bytes(ignorechars)
+            if ignorechars:
+                warnings.warn(f'invalid character {chr(badchar)!a} in Base64 
data '
+                              f'with altchars={altchars!r} '
+                              f'and ignorechars={ignorechars!r} '
+                              f'will be discarded in future Python versions',
+                              FutureWarning, stacklevel=2)
+            else:
+                warnings.warn(f'invalid character {chr(badchar)!a} in Base64 
data '
+                              f'with altchars={altchars!r} and validate=False '
+                              f'will be discarded in future Python versions',
+                              FutureWarning, stacklevel=2)
     return result
 
 
diff --git a/Lib/test/test_base64.py b/Lib/test/test_base64.py
index 6e69ece8065ea2..5f7a41f53345d2 100644
--- a/Lib/test/test_base64.py
+++ b/Lib/test/test_base64.py
@@ -303,22 +303,26 @@ def test_b64decode_padding_error(self):
 
     def test_b64decode_invalid_chars(self):
         # issue 1466065: Test some invalid characters.
-        tests = ((b'%3d==', b'\xdd'),
-                 (b'$3d==', b'\xdd'),
-                 (b'[==', b''),
-                 (b'YW]3=', b'am'),
-                 (b'3{d==', b'\xdd'),
-                 (b'3d}==', b'\xdd'),
-                 (b'@@', b''),
-                 (b'!', b''),
-                 (b"YWJj\n", b"abc"),
-                 (b'YWJj\nYWI=', b'abcab'))
+        tests = ((b'%3d==', b'\xdd', b'%$'),
+                 (b'$3d==', b'\xdd', b'%$'),
+                 (b'[==', b'', None),
+                 (b'YW]3=', b'am', b']'),
+                 (b'3{d==', b'\xdd', b'{}'),
+                 (b'3d}==', b'\xdd', b'{}'),
+                 (b'@@', b'', b'@!'),
+                 (b'!', b'', b'@!'),
+                 (b"YWJj\n", b"abc", b'\n'),
+                 (b'YWJj\nYWI=', b'abcab', b'\n'),
+                 (b'YW\nJj', b'abc', b'\n'),
+                 (b'YW\nJj', b'abc', bytearray(b'\n')),
+                 (b'YW\nJj', b'abc', memoryview(b'\n')),
+        )
         funcs = (
             base64.b64decode,
             base64.standard_b64decode,
             base64.urlsafe_b64decode,
         )
-        for bstr, res in tests:
+        for bstr, res, ignorechars in tests:
             for func in funcs:
                 with self.subTest(bstr=bstr, func=func):
                     self.assertEqual(func(bstr), res)
@@ -327,24 +331,76 @@ def test_b64decode_invalid_chars(self):
                 base64.b64decode(bstr, validate=True)
             with self.assertRaises(binascii.Error):
                 base64.b64decode(bstr.decode('ascii'), validate=True)
+            with self.assertRaises(binascii.Error):
+                # Even empty ignorechars enables the strict mode.
+                base64.b64decode(bstr, ignorechars=b'')
+            if ignorechars is not None:
+                r = base64.b64decode(bstr, ignorechars=ignorechars)
+                self.assertEqual(r, res)
+
+        with self.assertRaises(TypeError):
+            base64.b64decode(b'', ignorechars='')
+        with self.assertRaises(TypeError):
+            base64.b64decode(b'', ignorechars=[])
+        with self.assertRaises(TypeError):
+            base64.b64decode(b'', ignorechars=None)
 
         # Normal alphabet characters will be discarded when alternative given
-        with self.assertWarns(FutureWarning):
-            self.assertEqual(base64.b64decode(b'++++', altchars=b'-_'),
-                             b'\xfb\xef\xbe')
-        with self.assertWarns(FutureWarning):
-            self.assertEqual(base64.b64decode(b'////', altchars=b'-_'),
-                             b'\xff\xff\xff')
-        with self.assertWarns(DeprecationWarning):
-            self.assertEqual(base64.b64decode(b'++++', altchars=b'-_', 
validate=True),
-                             b'\xfb\xef\xbe')
-        with self.assertWarns(DeprecationWarning):
-            self.assertEqual(base64.b64decode(b'////', altchars=b'-_', 
validate=True),
-                             b'\xff\xff\xff')
-        with self.assertWarns(FutureWarning):
+        discarded = ("invalid character %a in Base64 data with %s "
+                     "will be discarded in future Python versions")
+        error = ("invalid character %a in Base64 data with %s "
+                 "will be an error in future Python versions")
+        with self.assertWarns(FutureWarning) as cm:
+            r = base64.b64decode(b'++++', altchars=b'-_')
+        self.assertEqual(r, b'\xfb\xef\xbe')
+        self.assertEqual(str(cm.warning),
+                         discarded % ('+', "altchars=b'-_' and 
validate=False"))
+        with self.assertWarns(FutureWarning) as cm:
+            r = base64.b64decode(b'////', altchars=b'-_')
+        self.assertEqual(r, b'\xff\xff\xff')
+        self.assertEqual(str(cm.warning),
+                         discarded % ('/', "altchars=b'-_' and 
validate=False"))
+        with self.assertWarns(DeprecationWarning) as cm:
+            r = base64.b64decode(b'++++', altchars=b'-_', validate=True)
+        self.assertEqual(r, b'\xfb\xef\xbe')
+        self.assertEqual(str(cm.warning),
+                         error % ('+', "altchars=b'-_' and validate=True"))
+        with self.assertWarns(DeprecationWarning) as cm:
+            r = base64.b64decode(b'////', altchars=b'-_', validate=True)
+        self.assertEqual(r, b'\xff\xff\xff')
+        self.assertEqual(str(cm.warning),
+                         error % ('/', "altchars=b'-_' and validate=True"))
+        with self.assertWarns(FutureWarning) as cm:
+            r = base64.b64decode(b'++++', altchars=b'-_', ignorechars=b'+')
+        self.assertEqual(r, b'\xfb\xef\xbe')
+        self.assertEqual(str(cm.warning),
+                         discarded % ('+', "altchars=b'-_' and 
ignorechars=b'+'"))
+        with self.assertWarns(FutureWarning) as cm:
+            r = base64.b64decode(b'////', altchars=b'-_', ignorechars=b'/')
+        self.assertEqual(r, b'\xff\xff\xff')
+        self.assertEqual(str(cm.warning),
+                         discarded % ('/', "altchars=b'-_' and 
ignorechars=b'/'"))
+        with self.assertWarns(DeprecationWarning) as cm:
+            r = base64.b64decode(b'++++////', altchars=b'-_', ignorechars=b'+')
+        self.assertEqual(r, b'\xfb\xef\xbe\xff\xff\xff')
+        self.assertEqual(str(cm.warning),
+                         error % ('/', "altchars=b'-_' and validate=True"))
+        with self.assertWarns(DeprecationWarning) as cm:
+            r = base64.b64decode(b'++++////', altchars=b'-_', ignorechars=b'/')
+        self.assertEqual(r, b'\xfb\xef\xbe\xff\xff\xff')
+        self.assertEqual(str(cm.warning),
+                         error % ('+', "altchars=b'-_' and validate=True"))
+
+        with self.assertWarns(FutureWarning) as cm:
             self.assertEqual(base64.urlsafe_b64decode(b'++++'), 
b'\xfb\xef\xbe')
-        with self.assertWarns(FutureWarning):
+        self.assertEqual(str(cm.warning),
+                         "invalid character '+' in URL-safe Base64 data "
+                         "will be discarded in future Python versions")
+        with self.assertWarns(FutureWarning) as cm:
             self.assertEqual(base64.urlsafe_b64decode(b'////'), 
b'\xff\xff\xff')
+        self.assertEqual(str(cm.warning),
+                         "invalid character '/' in URL-safe Base64 data "
+                         "will be discarded in future Python versions")
         with self.assertRaises(binascii.Error):
             base64.b64decode(b'+/!', altchars=b'-_')
 
diff --git a/Lib/test/test_binascii.py b/Lib/test/test_binascii.py
index 47e1e6ab035a17..4cfc332e89bea8 100644
--- a/Lib/test/test_binascii.py
+++ b/Lib/test/test_binascii.py
@@ -145,16 +145,16 @@ def assertExcessPadding(data, 
non_strict_mode_expected_result: bytes):
 
         # Test excess data exceptions
         assertExcessData(b'ab==a', b'i')
-        assertExcessData(b'ab===', b'i')
-        assertExcessData(b'ab====', b'i')
-        assertExcessData(b'ab==:', b'i')
+        assertExcessPadding(b'ab===', b'i')
+        assertExcessPadding(b'ab====', b'i')
+        assertNonBase64Data(b'ab==:', b'i')
         assertExcessData(b'abc=a', b'i\xb7')
-        assertExcessData(b'abc=:', b'i\xb7')
-        assertExcessData(b'ab==\n', b'i')
-        assertExcessData(b'abc==', b'i\xb7')
-        assertExcessData(b'abc===', b'i\xb7')
-        assertExcessData(b'abc====', b'i\xb7')
-        assertExcessData(b'abc=====', b'i\xb7')
+        assertNonBase64Data(b'abc=:', b'i\xb7')
+        assertNonBase64Data(b'ab==\n', b'i')
+        assertExcessPadding(b'abc==', b'i\xb7')
+        assertExcessPadding(b'abc===', b'i\xb7')
+        assertExcessPadding(b'abc====', b'i\xb7')
+        assertExcessPadding(b'abc=====', b'i\xb7')
 
         # Test non-base64 data exceptions
         assertNonBase64Data(b'\nab==', b'i')
@@ -170,12 +170,45 @@ def assertExcessPadding(data, 
non_strict_mode_expected_result: bytes):
         assertLeadingPadding(b'=====', b'')
         assertDiscontinuousPadding(b'ab=c=', b'i\xb7')
         assertDiscontinuousPadding(b'ab=ab==', b'i\xb6\x9b')
+        assertNonBase64Data(b'ab=:=', b'i')
         assertExcessPadding(b'abcd=', b'i\xb7\x1d')
         assertExcessPadding(b'abcd==', b'i\xb7\x1d')
         assertExcessPadding(b'abcd===', b'i\xb7\x1d')
         assertExcessPadding(b'abcd====', b'i\xb7\x1d')
         assertExcessPadding(b'abcd=====', b'i\xb7\x1d')
 
+    def test_base64_invalidchars(self):
+        def assertNonBase64Data(data, expected, ignorechars):
+            data = self.type2test(data)
+            assert_regex = r'(?i)Only base64 data'
+            self.assertEqual(binascii.a2b_base64(data), expected)
+            with self.assertRaisesRegex(binascii.Error, assert_regex):
+                binascii.a2b_base64(data, strict_mode=True)
+            with self.assertRaisesRegex(binascii.Error, assert_regex):
+                binascii.a2b_base64(data, ignorechars=b'')
+            self.assertEqual(binascii.a2b_base64(data, 
ignorechars=ignorechars),
+                             expected)
+            self.assertEqual(binascii.a2b_base64(data, strict_mode=False, 
ignorechars=b''),
+                             expected)
+
+        assertNonBase64Data(b'\nab==', b'i', ignorechars=b'\n')
+        assertNonBase64Data(b'ab:(){:|:&};:==', b'i', ignorechars=b':;(){}|&')
+        assertNonBase64Data(b'a\nb==', b'i', ignorechars=b'\n')
+        assertNonBase64Data(b'a\x00b==', b'i', ignorechars=b'\x00')
+        assertNonBase64Data(b'ab==:', b'i', ignorechars=b':')
+        assertNonBase64Data(b'abc=:', b'i\xb7', ignorechars=b':')
+        assertNonBase64Data(b'ab==\n', b'i', ignorechars=b'\n')
+        assertNonBase64Data(b'ab=:=', b'i', ignorechars=b':')
+        assertNonBase64Data(b'a\nb==', b'i', ignorechars=bytearray(b'\n'))
+        assertNonBase64Data(b'a\nb==', b'i', ignorechars=memoryview(b'\n'))
+
+        data = self.type2test(b'a\nb==')
+        with self.assertRaises(TypeError):
+            binascii.a2b_base64(data, ignorechars='')
+        with self.assertRaises(TypeError):
+            binascii.a2b_base64(data, ignorechars=[])
+        with self.assertRaises(TypeError):
+            binascii.a2b_base64(data, ignorechars=None)
 
     def test_base64errors(self):
         # Test base64 with invalid padding
diff --git 
a/Misc/NEWS.d/next/Library/2026-01-19-10-26-59.gh-issue-144001.dGj8Nk.rst 
b/Misc/NEWS.d/next/Library/2026-01-19-10-26-59.gh-issue-144001.dGj8Nk.rst
new file mode 100644
index 00000000000000..02d453f4d2ceee
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2026-01-19-10-26-59.gh-issue-144001.dGj8Nk.rst
@@ -0,0 +1,2 @@
+Added the *ignorechars* parameter in :func:`binascii.a2b_base64` and
+:func:`base64.b64decode`.
diff --git a/Modules/binascii.c b/Modules/binascii.c
index c569d3187f2e67..593b27ac5ede65 100644
--- a/Modules/binascii.c
+++ b/Modules/binascii.c
@@ -469,32 +469,45 @@ binascii_b2a_uu_impl(PyObject *module, Py_buffer *data, 
int backtick)
     return PyBytesWriter_FinishWithPointer(writer, ascii_data);
 }
 
+
+static int
+ignorechar(unsigned char c, Py_buffer *ignorechars)
+{
+    return (ignorechars->buf != NULL &&
+            memchr(ignorechars->buf, c, ignorechars->len));
+}
+
 /*[clinic input]
-@permit_long_docstring_body
 binascii.a2b_base64
 
     data: ascii_buffer
     /
     *
-    strict_mode: bool = False
+    strict_mode: bool(c_default="-1", py_default="<unrepresentable>") = False
+        When set to true, bytes that are not part of the base64 standard are
+        not allowed.  The same applies to excess data after padding (= / ==).
+        Set to True by default if ignorechars is specified, False otherwise.
+    ignorechars: Py_buffer(py_default="<unrepresentable>") = None
+        A byte string containing characters to ignore from the input when
+        strict_mode is true.
 
 Decode a line of base64 data.
-
-  strict_mode
-    When set to True, bytes that are not part of the base64 standard are not 
allowed.
-    The same applies to excess data after padding (= / ==).
 [clinic start generated code]*/
 
 static PyObject *
-binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode)
-/*[clinic end generated code: output=5409557788d4f975 input=13c797187acc9c40]*/
+binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode,
+                         Py_buffer *ignorechars)
+/*[clinic end generated code: output=eab37aea4cfa6daa input=3be4937d72943835]*/
 {
     assert(data->len >= 0);
 
     const unsigned char *ascii_data = data->buf;
     size_t ascii_len = data->len;
     binascii_state *state = NULL;
-    char padding_started = 0;
+
+    if (strict_mode == -1) {
+        strict_mode = (ignorechars->buf != NULL);
+    }
 
     /* Allocate the buffer */
     Py_ssize_t bin_len = ((ascii_len+3)/4)*3; /* Upper bound, corrected later 
*/
@@ -504,14 +517,6 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer 
*data, int strict_mode)
     }
     unsigned char *bin_data = PyBytesWriter_GetData(writer);
 
-    if (strict_mode && ascii_len > 0 && ascii_data[0] == '=') {
-        state = get_binascii_state(module);
-        if (state) {
-            PyErr_SetString(state->Error, "Leading padding not allowed");
-        }
-        goto error_end;
-    }
-
     size_t i = 0;  /* Current position in input */
 
     /* Fast path: use optimized decoder for complete quads.
@@ -538,36 +543,44 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer 
*data, int strict_mode)
         ** the invalid ones.
         */
         if (this_ch == BASE64_PAD) {
-            padding_started = 1;
+            pads++;
 
-            if (strict_mode && quad_pos == 0) {
-                state = get_binascii_state(module);
-                if (state) {
-                    PyErr_SetString(state->Error, "Excess padding not 
allowed");
+            if (strict_mode) {
+                if (quad_pos == 0) {
+                    state = get_binascii_state(module);
+                    if (state) {
+                        PyErr_SetString(state->Error, (i == 0)
+                            ? "Leading padding not allowed"
+                            : "Excess padding not allowed");
+                    }
+                    goto error_end;
                 }
-                goto error_end;
-            }
-            if (quad_pos >= 2 && quad_pos + ++pads >= 4) {
-                /* A pad sequence means we should not parse more input.
-                ** We've already interpreted the data from the quad at this 
point.
-                ** in strict mode, an error should raise if there's excess 
data after the padding.
-                */
-                if (strict_mode && i + 1 < ascii_len) {
+                if (quad_pos == 1) {
+                    /* Set an error below. */
+                    break;
+                }
+                if (quad_pos + pads > 4) {
                     state = get_binascii_state(module);
                     if (state) {
-                        PyErr_SetString(state->Error, "Excess data after 
padding");
+                        PyErr_SetString(state->Error, "Excess padding not 
allowed");
                     }
                     goto error_end;
                 }
-
-                goto done;
+            }
+            else {
+                if (quad_pos >= 2 && quad_pos + pads >= 4) {
+                    /* A pad sequence means we should not parse more input.
+                    ** We've already interpreted the data from the quad at 
this point.
+                    */
+                    goto done;
+                }
             }
             continue;
         }
 
-        this_ch = table_a2b_base64[this_ch];
-        if (this_ch >= 64) {
-            if (strict_mode) {
+        unsigned char v = table_a2b_base64[this_ch];
+        if (v >= 64) {
+            if (strict_mode && !ignorechar(this_ch, ignorechars)) {
                 state = get_binascii_state(module);
                 if (state) {
                     PyErr_SetString(state->Error, "Only base64 data is 
allowed");
@@ -578,10 +591,12 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer 
*data, int strict_mode)
         }
 
         // Characters that are not '=', in the middle of the padding, are not 
allowed
-        if (strict_mode && padding_started) {
+        if (strict_mode && pads) {
             state = get_binascii_state(module);
             if (state) {
-                PyErr_SetString(state->Error, "Discontinuous padding not 
allowed");
+                PyErr_SetString(state->Error, (quad_pos + pads == 4)
+                    ? "Excess data after padding"
+                    : "Discontinuous padding not allowed");
             }
             goto error_end;
         }
@@ -590,44 +605,46 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer 
*data, int strict_mode)
         switch (quad_pos) {
             case 0:
                 quad_pos = 1;
-                leftchar = this_ch;
+                leftchar = v;
                 break;
             case 1:
                 quad_pos = 2;
-                *bin_data++ = (leftchar << 2) | (this_ch >> 4);
-                leftchar = this_ch & 0x0f;
+                *bin_data++ = (leftchar << 2) | (v >> 4);
+                leftchar = v & 0x0f;
                 break;
             case 2:
                 quad_pos = 3;
-                *bin_data++ = (leftchar << 4) | (this_ch >> 2);
-                leftchar = this_ch & 0x03;
+                *bin_data++ = (leftchar << 4) | (v >> 2);
+                leftchar = v & 0x03;
                 break;
             case 3:
                 quad_pos = 0;
-                *bin_data++ = (leftchar << 6) | (this_ch);
+                *bin_data++ = (leftchar << 6) | (v);
                 leftchar = 0;
                 break;
         }
     }
 
-    if (quad_pos != 0) {
+    if (quad_pos == 1) {
+        /* There is exactly one extra valid, non-padding, base64 character.
+        ** This is an invalid length, as there is no possible input that
+        ** could encoded into such a base64 string.
+        */
         state = get_binascii_state(module);
-        if (state == NULL) {
-            /* error already set, from get_binascii_state */
-            assert(PyErr_Occurred());
-        } else if (quad_pos == 1) {
-            /*
-            ** There is exactly one extra valid, non-padding, base64 character.
-            ** This is an invalid length, as there is no possible input that
-            ** could encoded into such a base64 string.
-            */
+        if (state) {
             unsigned char *bin_data_start = PyBytesWriter_GetData(writer);
             PyErr_Format(state->Error,
                          "Invalid base64-encoded string: "
                          "number of data characters (%zd) cannot be 1 more "
                          "than a multiple of 4",
                          (bin_data - bin_data_start) / 3 * 4 + 1);
-        } else {
+        }
+        goto error_end;
+    }
+
+    if (quad_pos != 0 && quad_pos + pads != 4) {
+        state = get_binascii_state(module);
+        if (state) {
             PyErr_SetString(state->Error, "Incorrect padding");
         }
         goto error_end;
diff --git a/Modules/clinic/binascii.c.h b/Modules/clinic/binascii.c.h
index 524f5fc93d0c21..91325b1bdddf89 100644
--- a/Modules/clinic/binascii.c.h
+++ b/Modules/clinic/binascii.c.h
@@ -116,20 +116,26 @@ binascii_b2a_uu(PyObject *module, PyObject *const *args, 
Py_ssize_t nargs, PyObj
 }
 
 PyDoc_STRVAR(binascii_a2b_base64__doc__,
-"a2b_base64($module, data, /, *, strict_mode=False)\n"
+"a2b_base64($module, data, /, *, strict_mode=<unrepresentable>,\n"
+"           ignorechars=<unrepresentable>)\n"
 "--\n"
 "\n"
 "Decode a line of base64 data.\n"
 "\n"
 "  strict_mode\n"
-"    When set to True, bytes that are not part of the base64 standard are not 
allowed.\n"
-"    The same applies to excess data after padding (= / ==).");
+"    When set to true, bytes that are not part of the base64 standard are\n"
+"    not allowed.  The same applies to excess data after padding (= / ==).\n"
+"    Set to True by default if ignorechars is specified, False otherwise.\n"
+"  ignorechars\n"
+"    A byte string containing characters to ignore from the input when\n"
+"    strict_mode is true.");
 
 #define BINASCII_A2B_BASE64_METHODDEF    \
     {"a2b_base64", _PyCFunction_CAST(binascii_a2b_base64), 
METH_FASTCALL|METH_KEYWORDS, binascii_a2b_base64__doc__},
 
 static PyObject *
-binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode);
+binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode,
+                         Py_buffer *ignorechars);
 
 static PyObject *
 binascii_a2b_base64(PyObject *module, PyObject *const *args, Py_ssize_t nargs, 
PyObject *kwnames)
@@ -137,7 +143,7 @@ binascii_a2b_base64(PyObject *module, PyObject *const 
*args, Py_ssize_t nargs, P
     PyObject *return_value = NULL;
     #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
 
-    #define NUM_KEYWORDS 1
+    #define NUM_KEYWORDS 2
     static struct {
         PyGC_Head _this_is_not_used;
         PyObject_VAR_HEAD
@@ -146,7 +152,7 @@ binascii_a2b_base64(PyObject *module, PyObject *const 
*args, Py_ssize_t nargs, P
     } _kwtuple = {
         .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS)
         .ob_hash = -1,
-        .ob_item = { &_Py_ID(strict_mode), },
+        .ob_item = { &_Py_ID(strict_mode), &_Py_ID(ignorechars), },
     };
     #undef NUM_KEYWORDS
     #define KWTUPLE (&_kwtuple.ob_base.ob_base)
@@ -155,17 +161,18 @@ binascii_a2b_base64(PyObject *module, PyObject *const 
*args, Py_ssize_t nargs, P
     #  define KWTUPLE NULL
     #endif  // !Py_BUILD_CORE
 
-    static const char * const _keywords[] = {"", "strict_mode", NULL};
+    static const char * const _keywords[] = {"", "strict_mode", "ignorechars", 
NULL};
     static _PyArg_Parser _parser = {
         .keywords = _keywords,
         .fname = "a2b_base64",
         .kwtuple = KWTUPLE,
     };
     #undef KWTUPLE
-    PyObject *argsbuf[2];
+    PyObject *argsbuf[3];
     Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 
1;
     Py_buffer data = {NULL, NULL};
-    int strict_mode = 0;
+    int strict_mode = -1;
+    Py_buffer ignorechars = {NULL, NULL};
 
     args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser,
             /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf);
@@ -178,17 +185,29 @@ binascii_a2b_base64(PyObject *module, PyObject *const 
*args, Py_ssize_t nargs, P
     if (!noptargs) {
         goto skip_optional_kwonly;
     }
-    strict_mode = PyObject_IsTrue(args[1]);
-    if (strict_mode < 0) {
+    if (args[1]) {
+        strict_mode = PyObject_IsTrue(args[1]);
+        if (strict_mode < 0) {
+            goto exit;
+        }
+        if (!--noptargs) {
+            goto skip_optional_kwonly;
+        }
+    }
+    if (PyObject_GetBuffer(args[2], &ignorechars, PyBUF_SIMPLE) != 0) {
         goto exit;
     }
 skip_optional_kwonly:
-    return_value = binascii_a2b_base64_impl(module, &data, strict_mode);
+    return_value = binascii_a2b_base64_impl(module, &data, strict_mode, 
&ignorechars);
 
 exit:
     /* Cleanup for data */
     if (data.obj)
        PyBuffer_Release(&data);
+    /* Cleanup for ignorechars */
+    if (ignorechars.obj) {
+       PyBuffer_Release(&ignorechars);
+    }
 
     return return_value;
 }
@@ -823,4 +842,4 @@ binascii_b2a_qp(PyObject *module, PyObject *const *args, 
Py_ssize_t nargs, PyObj
 
     return return_value;
 }
-/*[clinic end generated code: output=644ccdc8e0d56e65 input=a9049054013a1b77]*/
+/*[clinic end generated code: output=13f0a4b0f5d3fcb4 input=a9049054013a1b77]*/

_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3//lists/python-checkins.python.org
Member address: [email protected]

[Python-checkins] gh-144001: Support ignorechars in binascii.a2b_base64() and base64.b64decode() (GH-144024)

Reply via email to