Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

gh-144264: Speed up Base64 decoding of data containing ignored characters#144265

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to ourterms of service andprivacy statement. We’ll occasionally send you account related emails.

Already on GitHub?Sign in to your account

Open
serhiy-storchaka wants to merge2 commits intopython:main
base:main
Choose a base branch
Loading
fromserhiy-storchaka:binascii-a2b_base64-ignorechars-speedup
Open
Show file tree
Hide file tree
Changes fromall commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletionsLib/test/test_binascii.py
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -202,6 +202,17 @@ def assertNonBase64Data(data, expected, ignorechars):
assertNonBase64Data(b'a\nb==', b'i', ignorechars=bytearray(b'\n'))
assertNonBase64Data(b'a\nb==', b'i', ignorechars=memoryview(b'\n'))

# Same byte in the bit cache: '\r' >> 3 == '\n' >> 3.
data = self.type2test(b'\r\n')
with self.assertRaises(binascii.Error):
binascii.a2b_base64(data, ignorechars=b'\r')
self.assertEqual(binascii.a2b_base64(data, ignorechars=b'\r\n'), b'')
# Same mask in the bit cache: ':' & 7 == '\n' & 7.
data = self.type2test(b':\n')
with self.assertRaises(binascii.Error):
binascii.a2b_base64(data, ignorechars=b':')
self.assertEqual(binascii.a2b_base64(data, ignorechars=b':\n'), b'')

data = self.type2test(b'a\nb==')
with self.assertRaises(TypeError):
binascii.a2b_base64(data, ignorechars='')
Expand Down
View file
Open in desktop
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
Speed up Base64 decoding of data containing ignored characters (both in
non-strict mode and with an explicit *ignorechars* argument).
It is now up to 2 times faster for multiline Base64 data.
37 changes: 26 additions & 11 deletionsModules/binascii.c
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -471,10 +471,19 @@ binascii_b2a_uu_impl(PyObject *module, Py_buffer *data, int backtick)


static int
ignorechar(unsigned char c, Py_buffer *ignorechars)
ignorechar(unsigned char c, Py_buffer *ignorechars, char ignorecache[32])
{
return (ignorechars->buf != NULL &&
memchr(ignorechars->buf, c, ignorechars->len));
if (ignorechars->buf == NULL) {
return 0;
}
if (ignorecache[c >> 3] & (1 << (c & 7))) {
return 1;
}
if (memchr(ignorechars->buf, c, ignorechars->len)) {
ignorecache[c >> 3] |= 1 << (c & 7);
return 1;
}
return 0;
}

/*[clinic input]
Expand DownExpand Up@@ -508,6 +517,10 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode,
if (strict_mode == -1) {
strict_mode = (ignorechars->buf != NULL);
}
char ignorecache[32];
if (strict_mode && ignorechars->buf != NULL) {
memset(ignorecache, 0, sizeof(ignorecache));
}

/* Allocate the buffer */
Py_ssize_t bin_len = ((ascii_len+3)/4)*3; /* Upper bound, corrected later */
Expand All@@ -517,8 +530,7 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode,
}
unsigned char *bin_data = PyBytesWriter_GetData(writer);

size_t i = 0; /* Current position in input */

fastpath:
/* Fast path: use optimized decoder for complete quads.
* This works for both strict and non-strict mode for valid input.
* The fast path stops at padding, invalid chars, or incomplete groups.
Expand All@@ -527,7 +539,8 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode,
Py_ssize_t fast_chars = base64_decode_fast(ascii_data, (Py_ssize_t)ascii_len,
bin_data, table_a2b_base64);
if (fast_chars > 0) {
i = (size_t)fast_chars;
ascii_data += fast_chars;
ascii_len -= fast_chars;
bin_data += (fast_chars / 4) * 3;
}
}
Expand All@@ -536,8 +549,8 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode,
int quad_pos = 0;
unsigned char leftchar = 0;
int pads = 0;
for (;i <ascii_len;i++) {
unsigned char this_ch = ascii_data[i];
for (; ascii_len;ascii_data++, ascii_len--) {
unsigned char this_ch =*ascii_data;

/* Check for pad sequences and ignore
** the invalid ones.
Expand All@@ -549,7 +562,7 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode,
if (quad_pos == 0) {
state = get_binascii_state(module);
if (state) {
PyErr_SetString(state->Error, (i ==0)
PyErr_SetString(state->Error, (ascii_data ==data->buf)
? "Leading padding not allowed"
: "Excess padding not allowed");
}
Expand DownExpand Up@@ -580,7 +593,7 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode,

unsigned char v = table_a2b_base64[this_ch];
if (v >= 64) {
if (strict_mode && !ignorechar(this_ch, ignorechars)) {
if (strict_mode && !ignorechar(this_ch, ignorechars, ignorecache)) {
state = get_binascii_state(module);
if (state) {
PyErr_SetString(state->Error, "Only base64 data is allowed");
Expand DownExpand Up@@ -621,7 +634,9 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode,
quad_pos = 0;
*bin_data++ = (leftchar << 6) | (v);
leftchar = 0;
break;
ascii_data++;
ascii_len--;
goto fastpath;
}
}

Expand Down
Loading

[8]ページ先頭

©2009-2026 Movatter.jp