https://github.com/python/cpython/commit/7febbe6b600e63544d5e7000cf377eeead858a39
commit: 7febbe6b600e63544d5e7000cf377eeead858a39
branch: main
author: Serhiy Storchaka <[email protected]>
committer: serhiy-storchaka <[email protected]>
date: 2026-01-26T20:11:40+02:00
summary:
gh-144001: Support ignorechars in binascii.a2b_base64() and base64.b64decode()
(GH-144024)
files:
A Misc/NEWS.d/next/Library/2026-01-19-10-26-59.gh-issue-144001.dGj8Nk.rst
M Doc/library/base64.rst
M Doc/library/binascii.rst
M Doc/whatsnew/3.15.rst
M Include/internal/pycore_global_objects_fini_generated.h
M Include/internal/pycore_global_strings.h
M Include/internal/pycore_runtime_init_generated.h
M Include/internal/pycore_unicodeobject_generated.h
M Lib/base64.py
M Lib/test/test_base64.py
M Lib/test/test_binascii.py
M Modules/binascii.c
M Modules/clinic/binascii.c.h
diff --git a/Doc/library/base64.rst b/Doc/library/base64.rst
index 64d66fcf6bd50a..65b8aeaef8e939 100644
--- a/Doc/library/base64.rst
+++ b/Doc/library/base64.rst
@@ -73,6 +73,7 @@ POST request.
.. function:: b64decode(s, altchars=None, validate=False)
+ b64decode(s, altchars=None, validate=True, *, ignorechars)
Decode the Base64 encoded :term:`bytes-like object` or ASCII string
*s* and return the decoded :class:`bytes`.
@@ -84,11 +85,17 @@ POST request.
A :exc:`binascii.Error` exception is raised
if *s* is incorrectly padded.
- If *validate* is false (the default), characters that are neither
+ If *ignorechars* is specified, it should be a :term:`bytes-like object`
+ containing characters to ignore from the input when *validate* is true.
+ The default value of *validate* is ``True`` if *ignorechars* is specified,
+ ``False`` otherwise.
+
+ If *validate* is false, characters that are neither
in the normal base-64 alphabet nor the alternative alphabet are
discarded prior to the padding check, but the ``+`` and ``/`` characters
keep their meaning if they are not in *altchars* (they will be discarded
in future Python versions).
+
If *validate* is true, these non-alphabet characters in the input
result in a :exc:`binascii.Error`.
@@ -99,6 +106,10 @@ POST request.
is now deprecated.
+ .. versionchanged:: next
+ Added the *ignorechars* parameter.
+
+
.. function:: standard_b64encode(s)
Encode :term:`bytes-like object` *s* using the standard Base64 alphabet
diff --git a/Doc/library/binascii.rst b/Doc/library/binascii.rst
index eaf755711bc292..d9f0baedec85f2 100644
--- a/Doc/library/binascii.rst
+++ b/Doc/library/binascii.rst
@@ -49,10 +49,16 @@ The :mod:`binascii` module defines the following functions:
.. function:: a2b_base64(string, /, *, strict_mode=False)
+ a2b_base64(string, /, *, strict_mode=True, ignorechars)
Convert a block of base64 data back to binary and return the binary data.
More
than one line may be passed at a time.
+ If *ignorechars* is specified, it should be a :term:`bytes-like object`
+ containing characters to ignore from the input when *strict_mode* is true.
+ The default value of *strict_mode* is ``True`` if *ignorechars* is
specified,
+ ``False`` otherwise.
+
If *strict_mode* is true, only valid base64 data will be converted. Invalid
base64
data will raise :exc:`binascii.Error`.
@@ -66,6 +72,9 @@ The :mod:`binascii` module defines the following functions:
.. versionchanged:: 3.11
Added the *strict_mode* parameter.
+ .. versionchanged:: next
+ Added the *ignorechars* parameter.
+
.. function:: b2a_base64(data, *, wrapcol=0, newline=True)
diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst
index aec6b1ceea37cf..19c01b71f02fb6 100644
--- a/Doc/whatsnew/3.15.rst
+++ b/Doc/whatsnew/3.15.rst
@@ -444,6 +444,8 @@ base64
* Added the *wrapcol* parameter in :func:`~base64.b64encode`.
(Contributed by Serhiy Storchaka in :gh:`143214`.)
+* Added the *ignorechars* parameter in :func:`~base64.b64decode`.
+ (Contributed by Serhiy Storchaka in :gh:`144001`.)
binascii
--------
@@ -451,6 +453,9 @@ binascii
* Added the *wrapcol* parameter in :func:`~binascii.b2a_base64`.
(Contributed by Serhiy Storchaka in :gh:`143214`.)
+* Added the *ignorechars* parameter in :func:`~binascii.a2b_base64`.
+ (Contributed by Serhiy Storchaka in :gh:`144001`.)
+
calendar
--------
diff --git a/Include/internal/pycore_global_objects_fini_generated.h
b/Include/internal/pycore_global_objects_fini_generated.h
index 4a5b2a925413bf..fc297a2933a786 100644
--- a/Include/internal/pycore_global_objects_fini_generated.h
+++ b/Include/internal/pycore_global_objects_fini_generated.h
@@ -1797,6 +1797,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) {
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(ident));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(identity_hint));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(ignore));
+ _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(ignorechars));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(imag));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(implieslink));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(importlib));
diff --git a/Include/internal/pycore_global_strings.h
b/Include/internal/pycore_global_strings.h
index 7c2f44ef6dbe7a..563ccd7cf6d3f4 100644
--- a/Include/internal/pycore_global_strings.h
+++ b/Include/internal/pycore_global_strings.h
@@ -520,6 +520,7 @@ struct _Py_global_strings {
STRUCT_FOR_ID(ident)
STRUCT_FOR_ID(identity_hint)
STRUCT_FOR_ID(ignore)
+ STRUCT_FOR_ID(ignorechars)
STRUCT_FOR_ID(imag)
STRUCT_FOR_ID(implieslink)
STRUCT_FOR_ID(importlib)
diff --git a/Include/internal/pycore_runtime_init_generated.h
b/Include/internal/pycore_runtime_init_generated.h
index 6e7bad986dbeda..ba7c0e68434517 100644
--- a/Include/internal/pycore_runtime_init_generated.h
+++ b/Include/internal/pycore_runtime_init_generated.h
@@ -1795,6 +1795,7 @@ extern "C" {
INIT_ID(ident), \
INIT_ID(identity_hint), \
INIT_ID(ignore), \
+ INIT_ID(ignorechars), \
INIT_ID(imag), \
INIT_ID(implieslink), \
INIT_ID(importlib), \
diff --git a/Include/internal/pycore_unicodeobject_generated.h
b/Include/internal/pycore_unicodeobject_generated.h
index 660115931da0a0..44063794293990 100644
--- a/Include/internal/pycore_unicodeobject_generated.h
+++ b/Include/internal/pycore_unicodeobject_generated.h
@@ -1860,6 +1860,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp)
{
_PyUnicode_InternStatic(interp, &string);
assert(_PyUnicode_CheckConsistency(string, 1));
assert(PyUnicode_GET_LENGTH(string) != 1);
+ string = &_Py_ID(ignorechars);
+ _PyUnicode_InternStatic(interp, &string);
+ assert(_PyUnicode_CheckConsistency(string, 1));
+ assert(PyUnicode_GET_LENGTH(string) != 1);
string = &_Py_ID(imag);
_PyUnicode_InternStatic(interp, &string);
assert(_PyUnicode_CheckConsistency(string, 1));
diff --git a/Lib/base64.py b/Lib/base64.py
index 6e0da16b23ce99..6e9d24f0649320 100644
--- a/Lib/base64.py
+++ b/Lib/base64.py
@@ -26,6 +26,8 @@
]
+_NOT_SPECIFIED = ['NOT SPECIFIED']
+
bytes_types = (bytes, bytearray) # Types acceptable as binary data
def _bytes_from_decode_data(s):
@@ -62,7 +64,7 @@ def b64encode(s, altchars=None, *, wrapcol=0):
return encoded
-def b64decode(s, altchars=None, validate=False):
+def b64decode(s, altchars=None, validate=_NOT_SPECIFIED, *,
ignorechars=_NOT_SPECIFIED):
"""Decode the Base64 encoded bytes-like object or ASCII string s.
Optional altchars must be a bytes-like object or ASCII string of length 2
@@ -72,38 +74,64 @@ def b64decode(s, altchars=None, validate=False):
The result is returned as a bytes object. A binascii.Error is raised if
s is incorrectly padded.
- If validate is false (the default), characters that are neither in the
- normal base-64 alphabet nor the alternative alphabet are discarded prior
- to the padding check. If validate is true, these non-alphabet characters
- in the input result in a binascii.Error.
+ If ignorechars is specified, it should be a byte string containing
+ characters to ignore from the input. The default value of validate is
+ True if ignorechars is specified, False otherwise.
+
+ If validate is false, characters that are neither in the normal base-64
+ alphabet nor the alternative alphabet are discarded prior to the
+ padding check. If validate is true, these non-alphabet characters in
+ the input result in a binascii.Error if they are not in ignorechars.
For more information about the strict base64 check, see:
https://docs.python.org/3.11/library/binascii.html#binascii.a2b_base64
"""
s = _bytes_from_decode_data(s)
+ if validate is _NOT_SPECIFIED:
+ validate = ignorechars is not _NOT_SPECIFIED
+ if ignorechars is _NOT_SPECIFIED:
+ ignorechars = b''
badchar = None
+ badchar_strict = False
if altchars is not None:
altchars = _bytes_from_decode_data(altchars)
if len(altchars) != 2:
raise ValueError(f'invalid altchars: {altchars!r}')
for b in b'+/':
if b not in altchars and b in s:
- badchar = b
- break
+ if badchar is None:
+ badchar = b
+ if not validate:
+ break
+ if not isinstance(ignorechars, (bytes, bytearray)):
+ ignorechars = memoryview(ignorechars).cast('B')
+ if b not in ignorechars:
+ badchar_strict = True
+ badchar = b
+ break
s = s.translate(bytes.maketrans(altchars, b'+/'))
- result = binascii.a2b_base64(s, strict_mode=validate)
+ result = binascii.a2b_base64(s, strict_mode=validate,
+ ignorechars=ignorechars)
if badchar is not None:
import warnings
- if validate:
+ if badchar_strict:
warnings.warn(f'invalid character {chr(badchar)!a} in Base64 data '
f'with altchars={altchars!r} and validate=True '
f'will be an error in future Python versions',
DeprecationWarning, stacklevel=2)
else:
- warnings.warn(f'invalid character {chr(badchar)!a} in Base64 data '
- f'with altchars={altchars!r} and validate=False '
- f'will be discarded in future Python versions',
- FutureWarning, stacklevel=2)
+ ignorechars = bytes(ignorechars)
+ if ignorechars:
+ warnings.warn(f'invalid character {chr(badchar)!a} in Base64
data '
+ f'with altchars={altchars!r} '
+ f'and ignorechars={ignorechars!r} '
+ f'will be discarded in future Python versions',
+ FutureWarning, stacklevel=2)
+ else:
+ warnings.warn(f'invalid character {chr(badchar)!a} in Base64
data '
+ f'with altchars={altchars!r} and validate=False '
+ f'will be discarded in future Python versions',
+ FutureWarning, stacklevel=2)
return result
diff --git a/Lib/test/test_base64.py b/Lib/test/test_base64.py
index 6e69ece8065ea2..5f7a41f53345d2 100644
--- a/Lib/test/test_base64.py
+++ b/Lib/test/test_base64.py
@@ -303,22 +303,26 @@ def test_b64decode_padding_error(self):
def test_b64decode_invalid_chars(self):
# issue 1466065: Test some invalid characters.
- tests = ((b'%3d==', b'\xdd'),
- (b'$3d==', b'\xdd'),
- (b'[==', b''),
- (b'YW]3=', b'am'),
- (b'3{d==', b'\xdd'),
- (b'3d}==', b'\xdd'),
- (b'@@', b''),
- (b'!', b''),
- (b"YWJj\n", b"abc"),
- (b'YWJj\nYWI=', b'abcab'))
+ tests = ((b'%3d==', b'\xdd', b'%$'),
+ (b'$3d==', b'\xdd', b'%$'),
+ (b'[==', b'', None),
+ (b'YW]3=', b'am', b']'),
+ (b'3{d==', b'\xdd', b'{}'),
+ (b'3d}==', b'\xdd', b'{}'),
+ (b'@@', b'', b'@!'),
+ (b'!', b'', b'@!'),
+ (b"YWJj\n", b"abc", b'\n'),
+ (b'YWJj\nYWI=', b'abcab', b'\n'),
+ (b'YW\nJj', b'abc', b'\n'),
+ (b'YW\nJj', b'abc', bytearray(b'\n')),
+ (b'YW\nJj', b'abc', memoryview(b'\n')),
+ )
funcs = (
base64.b64decode,
base64.standard_b64decode,
base64.urlsafe_b64decode,
)
- for bstr, res in tests:
+ for bstr, res, ignorechars in tests:
for func in funcs:
with self.subTest(bstr=bstr, func=func):
self.assertEqual(func(bstr), res)
@@ -327,24 +331,76 @@ def test_b64decode_invalid_chars(self):
base64.b64decode(bstr, validate=True)
with self.assertRaises(binascii.Error):
base64.b64decode(bstr.decode('ascii'), validate=True)
+ with self.assertRaises(binascii.Error):
+ # Even empty ignorechars enables the strict mode.
+ base64.b64decode(bstr, ignorechars=b'')
+ if ignorechars is not None:
+ r = base64.b64decode(bstr, ignorechars=ignorechars)
+ self.assertEqual(r, res)
+
+ with self.assertRaises(TypeError):
+ base64.b64decode(b'', ignorechars='')
+ with self.assertRaises(TypeError):
+ base64.b64decode(b'', ignorechars=[])
+ with self.assertRaises(TypeError):
+ base64.b64decode(b'', ignorechars=None)
# Normal alphabet characters will be discarded when alternative given
- with self.assertWarns(FutureWarning):
- self.assertEqual(base64.b64decode(b'++++', altchars=b'-_'),
- b'\xfb\xef\xbe')
- with self.assertWarns(FutureWarning):
- self.assertEqual(base64.b64decode(b'////', altchars=b'-_'),
- b'\xff\xff\xff')
- with self.assertWarns(DeprecationWarning):
- self.assertEqual(base64.b64decode(b'++++', altchars=b'-_',
validate=True),
- b'\xfb\xef\xbe')
- with self.assertWarns(DeprecationWarning):
- self.assertEqual(base64.b64decode(b'////', altchars=b'-_',
validate=True),
- b'\xff\xff\xff')
- with self.assertWarns(FutureWarning):
+ discarded = ("invalid character %a in Base64 data with %s "
+ "will be discarded in future Python versions")
+ error = ("invalid character %a in Base64 data with %s "
+ "will be an error in future Python versions")
+ with self.assertWarns(FutureWarning) as cm:
+ r = base64.b64decode(b'++++', altchars=b'-_')
+ self.assertEqual(r, b'\xfb\xef\xbe')
+ self.assertEqual(str(cm.warning),
+ discarded % ('+', "altchars=b'-_' and
validate=False"))
+ with self.assertWarns(FutureWarning) as cm:
+ r = base64.b64decode(b'////', altchars=b'-_')
+ self.assertEqual(r, b'\xff\xff\xff')
+ self.assertEqual(str(cm.warning),
+ discarded % ('/', "altchars=b'-_' and
validate=False"))
+ with self.assertWarns(DeprecationWarning) as cm:
+ r = base64.b64decode(b'++++', altchars=b'-_', validate=True)
+ self.assertEqual(r, b'\xfb\xef\xbe')
+ self.assertEqual(str(cm.warning),
+ error % ('+', "altchars=b'-_' and validate=True"))
+ with self.assertWarns(DeprecationWarning) as cm:
+ r = base64.b64decode(b'////', altchars=b'-_', validate=True)
+ self.assertEqual(r, b'\xff\xff\xff')
+ self.assertEqual(str(cm.warning),
+ error % ('/', "altchars=b'-_' and validate=True"))
+ with self.assertWarns(FutureWarning) as cm:
+ r = base64.b64decode(b'++++', altchars=b'-_', ignorechars=b'+')
+ self.assertEqual(r, b'\xfb\xef\xbe')
+ self.assertEqual(str(cm.warning),
+ discarded % ('+', "altchars=b'-_' and
ignorechars=b'+'"))
+ with self.assertWarns(FutureWarning) as cm:
+ r = base64.b64decode(b'////', altchars=b'-_', ignorechars=b'/')
+ self.assertEqual(r, b'\xff\xff\xff')
+ self.assertEqual(str(cm.warning),
+ discarded % ('/', "altchars=b'-_' and
ignorechars=b'/'"))
+ with self.assertWarns(DeprecationWarning) as cm:
+ r = base64.b64decode(b'++++////', altchars=b'-_', ignorechars=b'+')
+ self.assertEqual(r, b'\xfb\xef\xbe\xff\xff\xff')
+ self.assertEqual(str(cm.warning),
+ error % ('/', "altchars=b'-_' and validate=True"))
+ with self.assertWarns(DeprecationWarning) as cm:
+ r = base64.b64decode(b'++++////', altchars=b'-_', ignorechars=b'/')
+ self.assertEqual(r, b'\xfb\xef\xbe\xff\xff\xff')
+ self.assertEqual(str(cm.warning),
+ error % ('+', "altchars=b'-_' and validate=True"))
+
+ with self.assertWarns(FutureWarning) as cm:
self.assertEqual(base64.urlsafe_b64decode(b'++++'),
b'\xfb\xef\xbe')
- with self.assertWarns(FutureWarning):
+ self.assertEqual(str(cm.warning),
+ "invalid character '+' in URL-safe Base64 data "
+ "will be discarded in future Python versions")
+ with self.assertWarns(FutureWarning) as cm:
self.assertEqual(base64.urlsafe_b64decode(b'////'),
b'\xff\xff\xff')
+ self.assertEqual(str(cm.warning),
+ "invalid character '/' in URL-safe Base64 data "
+ "will be discarded in future Python versions")
with self.assertRaises(binascii.Error):
base64.b64decode(b'+/!', altchars=b'-_')
diff --git a/Lib/test/test_binascii.py b/Lib/test/test_binascii.py
index 47e1e6ab035a17..4cfc332e89bea8 100644
--- a/Lib/test/test_binascii.py
+++ b/Lib/test/test_binascii.py
@@ -145,16 +145,16 @@ def assertExcessPadding(data,
non_strict_mode_expected_result: bytes):
# Test excess data exceptions
assertExcessData(b'ab==a', b'i')
- assertExcessData(b'ab===', b'i')
- assertExcessData(b'ab====', b'i')
- assertExcessData(b'ab==:', b'i')
+ assertExcessPadding(b'ab===', b'i')
+ assertExcessPadding(b'ab====', b'i')
+ assertNonBase64Data(b'ab==:', b'i')
assertExcessData(b'abc=a', b'i\xb7')
- assertExcessData(b'abc=:', b'i\xb7')
- assertExcessData(b'ab==\n', b'i')
- assertExcessData(b'abc==', b'i\xb7')
- assertExcessData(b'abc===', b'i\xb7')
- assertExcessData(b'abc====', b'i\xb7')
- assertExcessData(b'abc=====', b'i\xb7')
+ assertNonBase64Data(b'abc=:', b'i\xb7')
+ assertNonBase64Data(b'ab==\n', b'i')
+ assertExcessPadding(b'abc==', b'i\xb7')
+ assertExcessPadding(b'abc===', b'i\xb7')
+ assertExcessPadding(b'abc====', b'i\xb7')
+ assertExcessPadding(b'abc=====', b'i\xb7')
# Test non-base64 data exceptions
assertNonBase64Data(b'\nab==', b'i')
@@ -170,12 +170,45 @@ def assertExcessPadding(data,
non_strict_mode_expected_result: bytes):
assertLeadingPadding(b'=====', b'')
assertDiscontinuousPadding(b'ab=c=', b'i\xb7')
assertDiscontinuousPadding(b'ab=ab==', b'i\xb6\x9b')
+ assertNonBase64Data(b'ab=:=', b'i')
assertExcessPadding(b'abcd=', b'i\xb7\x1d')
assertExcessPadding(b'abcd==', b'i\xb7\x1d')
assertExcessPadding(b'abcd===', b'i\xb7\x1d')
assertExcessPadding(b'abcd====', b'i\xb7\x1d')
assertExcessPadding(b'abcd=====', b'i\xb7\x1d')
+ def test_base64_invalidchars(self):
+ def assertNonBase64Data(data, expected, ignorechars):
+ data = self.type2test(data)
+ assert_regex = r'(?i)Only base64 data'
+ self.assertEqual(binascii.a2b_base64(data), expected)
+ with self.assertRaisesRegex(binascii.Error, assert_regex):
+ binascii.a2b_base64(data, strict_mode=True)
+ with self.assertRaisesRegex(binascii.Error, assert_regex):
+ binascii.a2b_base64(data, ignorechars=b'')
+ self.assertEqual(binascii.a2b_base64(data,
ignorechars=ignorechars),
+ expected)
+ self.assertEqual(binascii.a2b_base64(data, strict_mode=False,
ignorechars=b''),
+ expected)
+
+ assertNonBase64Data(b'\nab==', b'i', ignorechars=b'\n')
+ assertNonBase64Data(b'ab:(){:|:&};:==', b'i', ignorechars=b':;(){}|&')
+ assertNonBase64Data(b'a\nb==', b'i', ignorechars=b'\n')
+ assertNonBase64Data(b'a\x00b==', b'i', ignorechars=b'\x00')
+ assertNonBase64Data(b'ab==:', b'i', ignorechars=b':')
+ assertNonBase64Data(b'abc=:', b'i\xb7', ignorechars=b':')
+ assertNonBase64Data(b'ab==\n', b'i', ignorechars=b'\n')
+ assertNonBase64Data(b'ab=:=', b'i', ignorechars=b':')
+ assertNonBase64Data(b'a\nb==', b'i', ignorechars=bytearray(b'\n'))
+ assertNonBase64Data(b'a\nb==', b'i', ignorechars=memoryview(b'\n'))
+
+ data = self.type2test(b'a\nb==')
+ with self.assertRaises(TypeError):
+ binascii.a2b_base64(data, ignorechars='')
+ with self.assertRaises(TypeError):
+ binascii.a2b_base64(data, ignorechars=[])
+ with self.assertRaises(TypeError):
+ binascii.a2b_base64(data, ignorechars=None)
def test_base64errors(self):
# Test base64 with invalid padding
diff --git
a/Misc/NEWS.d/next/Library/2026-01-19-10-26-59.gh-issue-144001.dGj8Nk.rst
b/Misc/NEWS.d/next/Library/2026-01-19-10-26-59.gh-issue-144001.dGj8Nk.rst
new file mode 100644
index 00000000000000..02d453f4d2ceee
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2026-01-19-10-26-59.gh-issue-144001.dGj8Nk.rst
@@ -0,0 +1,2 @@
+Added the *ignorechars* parameter in :func:`binascii.a2b_base64` and
+:func:`base64.b64decode`.
diff --git a/Modules/binascii.c b/Modules/binascii.c
index c569d3187f2e67..593b27ac5ede65 100644
--- a/Modules/binascii.c
+++ b/Modules/binascii.c
@@ -469,32 +469,45 @@ binascii_b2a_uu_impl(PyObject *module, Py_buffer *data,
int backtick)
return PyBytesWriter_FinishWithPointer(writer, ascii_data);
}
+
+static int
+ignorechar(unsigned char c, Py_buffer *ignorechars)
+{
+ return (ignorechars->buf != NULL &&
+ memchr(ignorechars->buf, c, ignorechars->len));
+}
+
/*[clinic input]
-@permit_long_docstring_body
binascii.a2b_base64
data: ascii_buffer
/
*
- strict_mode: bool = False
+ strict_mode: bool(c_default="-1", py_default="<unrepresentable>") = False
+ When set to true, bytes that are not part of the base64 standard are
+ not allowed. The same applies to excess data after padding (= / ==).
+ Set to True by default if ignorechars is specified, False otherwise.
+ ignorechars: Py_buffer(py_default="<unrepresentable>") = None
+ A byte string containing characters to ignore from the input when
+ strict_mode is true.
Decode a line of base64 data.
-
- strict_mode
- When set to True, bytes that are not part of the base64 standard are not
allowed.
- The same applies to excess data after padding (= / ==).
[clinic start generated code]*/
static PyObject *
-binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode)
-/*[clinic end generated code: output=5409557788d4f975 input=13c797187acc9c40]*/
+binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode,
+ Py_buffer *ignorechars)
+/*[clinic end generated code: output=eab37aea4cfa6daa input=3be4937d72943835]*/
{
assert(data->len >= 0);
const unsigned char *ascii_data = data->buf;
size_t ascii_len = data->len;
binascii_state *state = NULL;
- char padding_started = 0;
+
+ if (strict_mode == -1) {
+ strict_mode = (ignorechars->buf != NULL);
+ }
/* Allocate the buffer */
Py_ssize_t bin_len = ((ascii_len+3)/4)*3; /* Upper bound, corrected later
*/
@@ -504,14 +517,6 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer
*data, int strict_mode)
}
unsigned char *bin_data = PyBytesWriter_GetData(writer);
- if (strict_mode && ascii_len > 0 && ascii_data[0] == '=') {
- state = get_binascii_state(module);
- if (state) {
- PyErr_SetString(state->Error, "Leading padding not allowed");
- }
- goto error_end;
- }
-
size_t i = 0; /* Current position in input */
/* Fast path: use optimized decoder for complete quads.
@@ -538,36 +543,44 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer
*data, int strict_mode)
** the invalid ones.
*/
if (this_ch == BASE64_PAD) {
- padding_started = 1;
+ pads++;
- if (strict_mode && quad_pos == 0) {
- state = get_binascii_state(module);
- if (state) {
- PyErr_SetString(state->Error, "Excess padding not
allowed");
+ if (strict_mode) {
+ if (quad_pos == 0) {
+ state = get_binascii_state(module);
+ if (state) {
+ PyErr_SetString(state->Error, (i == 0)
+ ? "Leading padding not allowed"
+ : "Excess padding not allowed");
+ }
+ goto error_end;
}
- goto error_end;
- }
- if (quad_pos >= 2 && quad_pos + ++pads >= 4) {
- /* A pad sequence means we should not parse more input.
- ** We've already interpreted the data from the quad at this
point.
- ** in strict mode, an error should raise if there's excess
data after the padding.
- */
- if (strict_mode && i + 1 < ascii_len) {
+ if (quad_pos == 1) {
+ /* Set an error below. */
+ break;
+ }
+ if (quad_pos + pads > 4) {
state = get_binascii_state(module);
if (state) {
- PyErr_SetString(state->Error, "Excess data after
padding");
+ PyErr_SetString(state->Error, "Excess padding not
allowed");
}
goto error_end;
}
-
- goto done;
+ }
+ else {
+ if (quad_pos >= 2 && quad_pos + pads >= 4) {
+ /* A pad sequence means we should not parse more input.
+ ** We've already interpreted the data from the quad at
this point.
+ */
+ goto done;
+ }
}
continue;
}
- this_ch = table_a2b_base64[this_ch];
- if (this_ch >= 64) {
- if (strict_mode) {
+ unsigned char v = table_a2b_base64[this_ch];
+ if (v >= 64) {
+ if (strict_mode && !ignorechar(this_ch, ignorechars)) {
state = get_binascii_state(module);
if (state) {
PyErr_SetString(state->Error, "Only base64 data is
allowed");
@@ -578,10 +591,12 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer
*data, int strict_mode)
}
// Characters that are not '=', in the middle of the padding, are not
allowed
- if (strict_mode && padding_started) {
+ if (strict_mode && pads) {
state = get_binascii_state(module);
if (state) {
- PyErr_SetString(state->Error, "Discontinuous padding not
allowed");
+ PyErr_SetString(state->Error, (quad_pos + pads == 4)
+ ? "Excess data after padding"
+ : "Discontinuous padding not allowed");
}
goto error_end;
}
@@ -590,44 +605,46 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer
*data, int strict_mode)
switch (quad_pos) {
case 0:
quad_pos = 1;
- leftchar = this_ch;
+ leftchar = v;
break;
case 1:
quad_pos = 2;
- *bin_data++ = (leftchar << 2) | (this_ch >> 4);
- leftchar = this_ch & 0x0f;
+ *bin_data++ = (leftchar << 2) | (v >> 4);
+ leftchar = v & 0x0f;
break;
case 2:
quad_pos = 3;
- *bin_data++ = (leftchar << 4) | (this_ch >> 2);
- leftchar = this_ch & 0x03;
+ *bin_data++ = (leftchar << 4) | (v >> 2);
+ leftchar = v & 0x03;
break;
case 3:
quad_pos = 0;
- *bin_data++ = (leftchar << 6) | (this_ch);
+ *bin_data++ = (leftchar << 6) | (v);
leftchar = 0;
break;
}
}
- if (quad_pos != 0) {
+ if (quad_pos == 1) {
+ /* There is exactly one extra valid, non-padding, base64 character.
+ ** This is an invalid length, as there is no possible input that
+ ** could encoded into such a base64 string.
+ */
state = get_binascii_state(module);
- if (state == NULL) {
- /* error already set, from get_binascii_state */
- assert(PyErr_Occurred());
- } else if (quad_pos == 1) {
- /*
- ** There is exactly one extra valid, non-padding, base64 character.
- ** This is an invalid length, as there is no possible input that
- ** could encoded into such a base64 string.
- */
+ if (state) {
unsigned char *bin_data_start = PyBytesWriter_GetData(writer);
PyErr_Format(state->Error,
"Invalid base64-encoded string: "
"number of data characters (%zd) cannot be 1 more "
"than a multiple of 4",
(bin_data - bin_data_start) / 3 * 4 + 1);
- } else {
+ }
+ goto error_end;
+ }
+
+ if (quad_pos != 0 && quad_pos + pads != 4) {
+ state = get_binascii_state(module);
+ if (state) {
PyErr_SetString(state->Error, "Incorrect padding");
}
goto error_end;
diff --git a/Modules/clinic/binascii.c.h b/Modules/clinic/binascii.c.h
index 524f5fc93d0c21..91325b1bdddf89 100644
--- a/Modules/clinic/binascii.c.h
+++ b/Modules/clinic/binascii.c.h
@@ -116,20 +116,26 @@ binascii_b2a_uu(PyObject *module, PyObject *const *args,
Py_ssize_t nargs, PyObj
}
PyDoc_STRVAR(binascii_a2b_base64__doc__,
-"a2b_base64($module, data, /, *, strict_mode=False)\n"
+"a2b_base64($module, data, /, *, strict_mode=<unrepresentable>,\n"
+" ignorechars=<unrepresentable>)\n"
"--\n"
"\n"
"Decode a line of base64 data.\n"
"\n"
" strict_mode\n"
-" When set to True, bytes that are not part of the base64 standard are not
allowed.\n"
-" The same applies to excess data after padding (= / ==).");
+" When set to true, bytes that are not part of the base64 standard are\n"
+" not allowed. The same applies to excess data after padding (= / ==).\n"
+" Set to True by default if ignorechars is specified, False otherwise.\n"
+" ignorechars\n"
+" A byte string containing characters to ignore from the input when\n"
+" strict_mode is true.");
#define BINASCII_A2B_BASE64_METHODDEF \
{"a2b_base64", _PyCFunction_CAST(binascii_a2b_base64),
METH_FASTCALL|METH_KEYWORDS, binascii_a2b_base64__doc__},
static PyObject *
-binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode);
+binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode,
+ Py_buffer *ignorechars);
static PyObject *
binascii_a2b_base64(PyObject *module, PyObject *const *args, Py_ssize_t nargs,
PyObject *kwnames)
@@ -137,7 +143,7 @@ binascii_a2b_base64(PyObject *module, PyObject *const
*args, Py_ssize_t nargs, P
PyObject *return_value = NULL;
#if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
- #define NUM_KEYWORDS 1
+ #define NUM_KEYWORDS 2
static struct {
PyGC_Head _this_is_not_used;
PyObject_VAR_HEAD
@@ -146,7 +152,7 @@ binascii_a2b_base64(PyObject *module, PyObject *const
*args, Py_ssize_t nargs, P
} _kwtuple = {
.ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS)
.ob_hash = -1,
- .ob_item = { &_Py_ID(strict_mode), },
+ .ob_item = { &_Py_ID(strict_mode), &_Py_ID(ignorechars), },
};
#undef NUM_KEYWORDS
#define KWTUPLE (&_kwtuple.ob_base.ob_base)
@@ -155,17 +161,18 @@ binascii_a2b_base64(PyObject *module, PyObject *const
*args, Py_ssize_t nargs, P
# define KWTUPLE NULL
#endif // !Py_BUILD_CORE
- static const char * const _keywords[] = {"", "strict_mode", NULL};
+ static const char * const _keywords[] = {"", "strict_mode", "ignorechars",
NULL};
static _PyArg_Parser _parser = {
.keywords = _keywords,
.fname = "a2b_base64",
.kwtuple = KWTUPLE,
};
#undef KWTUPLE
- PyObject *argsbuf[2];
+ PyObject *argsbuf[3];
Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) -
1;
Py_buffer data = {NULL, NULL};
- int strict_mode = 0;
+ int strict_mode = -1;
+ Py_buffer ignorechars = {NULL, NULL};
args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser,
/*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf);
@@ -178,17 +185,29 @@ binascii_a2b_base64(PyObject *module, PyObject *const
*args, Py_ssize_t nargs, P
if (!noptargs) {
goto skip_optional_kwonly;
}
- strict_mode = PyObject_IsTrue(args[1]);
- if (strict_mode < 0) {
+ if (args[1]) {
+ strict_mode = PyObject_IsTrue(args[1]);
+ if (strict_mode < 0) {
+ goto exit;
+ }
+ if (!--noptargs) {
+ goto skip_optional_kwonly;
+ }
+ }
+ if (PyObject_GetBuffer(args[2], &ignorechars, PyBUF_SIMPLE) != 0) {
goto exit;
}
skip_optional_kwonly:
- return_value = binascii_a2b_base64_impl(module, &data, strict_mode);
+ return_value = binascii_a2b_base64_impl(module, &data, strict_mode,
&ignorechars);
exit:
/* Cleanup for data */
if (data.obj)
PyBuffer_Release(&data);
+ /* Cleanup for ignorechars */
+ if (ignorechars.obj) {
+ PyBuffer_Release(&ignorechars);
+ }
return return_value;
}
@@ -823,4 +842,4 @@ binascii_b2a_qp(PyObject *module, PyObject *const *args,
Py_ssize_t nargs, PyObj
return return_value;
}
-/*[clinic end generated code: output=644ccdc8e0d56e65 input=a9049054013a1b77]*/
+/*[clinic end generated code: output=13f0a4b0f5d3fcb4 input=a9049054013a1b77]*/
_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3//lists/python-checkins.python.org
Member address: [email protected]