Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

gh-106939, gh-145261: Fix ShareableList data corruption#145488

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to ourterms of service andprivacy statement. We’ll occasionally send you account related emails.

Already on GitHub?Sign in to your account

Open
jakelodwick wants to merge1 commit intopython:main
base:main
Choose a base branch
Loading
fromjakelodwick:fix-shareablelist-corruption
Open
Show file tree
Hide file tree
Changes fromall commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 21 additions & 15 deletionsDoc/library/multiprocessing.shared_memory.rst
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -311,28 +311,34 @@ finishes execution.
existing:class:`!ShareableList`, specify its shared memory block's unique
name while leaving *sequence* set to ``None``.

..versionchanged::3.15
Trailing null bytes (``\x00``) in:class:`bytes` and:class:`str` values
are now preserved correctly, and multi-byte UTF-8 strings are no longer
corrupted. See:gh:`106939` and:gh:`145261`.

..note::

A known issue exists for:class:`bytes` and:class:`str` values.
If they end with ``\x00`` nul bytes or characters, those may be
*silently stripped* when fetching them by index from the
:class:`!ShareableList`. This ``.rstrip(b'\x00')`` behavior is
considered a bug and may go away in the future. See:gh:`106939`.
In Python 3.14 and earlier,:class:`bytes` and:class:`str` values
ending with ``\x00`` nul bytes had those bytes *silently stripped*
when fetched by index from the:class:`!ShareableList`. Multi-byte
UTF-8 strings could also be corrupted due to incorrect slot sizing.

For applicationswhere rstripping of trailing nulls is a problem,
work around it by always unconditionally appending an extra non-0
byte to the end of such values when storing and unconditionally
removing it when fetching:
For applicationsthat need to work with Python 3.14 and earlier where
rstripping of trailing nulls is a problem, work around it by always
unconditionally appending an extra non-0byte to the end of such values
when storing and unconditionallyremoving it when fetching:

..doctest::

>>>from multiprocessingimport shared_memory
>>>nul_bug_demo= shared_memory.ShareableList(['?\x00',b'\x03\x02\x01\x00\x00\x00'])
>>>nul_bug_demo[0]
'?'
>>>nul_bug_demo[1]
b'\x03\x02\x01'
>>>nul_bug_demo.shm.unlink()
>>># Python 3.15+: trailing nulls are preserved
>>>sl= shared_memory.ShareableList(['?\x00',b'\x03\x02\x01\x00\x00\x00'])
>>>sl[0]
'?\x00'
>>>sl[1]
b'\x03\x02\x01\x00\x00\x00'
>>>sl.shm.unlink()
>>># Workaround for Python 3.14 and earlier:
>>>padded= shared_memory.ShareableList(['?\x00\x07',b'\x03\x02\x01\x00\x00\x00\x07'])
>>>padded[0][:-1]
'?\x00'
Expand Down
38 changes: 26 additions & 12 deletionsLib/multiprocessing/shared_memory.py
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -286,9 +286,9 @@ class ShareableList:
_alignment=8
_back_transforms_mapping= {
0:lambdavalue:value,# int, float, bool
1:lambdavalue:value.rstrip(b'\x00').decode(_encoding),# str
2:lambdavalue:value.rstrip(b'\x00'),# bytes
3:lambda_value:None,# None
1:lambdavalue:value.decode(_encoding),# str
2:lambdavalue:value,# bytes
3:lambda_value:None,# None
}

@staticmethod
Expand All@@ -312,7 +312,13 @@ def __init__(self, sequence=None, *, name=None):
self._types_mapping[type(item)]
ifnotisinstance(item, (str,bytes))
elseself._types_mapping[type(item)]% (
self._alignment* (len(item)//self._alignment+1),
self._alignment* (
len(
item.encode(_encoding)
ifisinstance(item,str)
elseitem
)//self._alignment+1
),
)
foriteminsequence
]
Expand All@@ -326,6 +332,15 @@ def __init__(self, sequence=None, *, name=None):
forfmtin_formats:
offset+=self._alignmentiffmt[-1]!="s"elseint(fmt[:-1])
self._allocated_offsets.append(offset)
_stored_formats= []
foritem,fmtinzip(sequence,_formats):
ifisinstance(item, (str,bytes)):
encoded= (item.encode(_encoding)
ifisinstance(item,str)elseitem)
_stored_formats.append("%ds"%len(encoded))
else:
_stored_formats.append(fmt)

_recreation_codes= [
self._extract_recreation_code(item)foriteminsequence
]
Expand DownExpand Up@@ -359,7 +374,7 @@ def __init__(self, sequence=None, *, name=None):
self._format_packing_metainfo,
self.shm.buf,
self._offset_packing_formats,
*(v.encode(_enc)forvin_formats)
*(v.encode(_enc)forvin_stored_formats)
)
struct.pack_into(
self._format_back_transform_codes,
Expand DownExpand Up@@ -459,6 +474,7 @@ def __setitem__(self, position, value):

ifnotisinstance(value, (str,bytes)):
new_format=self._types_mapping[type(value)]
pack_format=new_format
encoded_value=value
else:
allocated_length=self._allocated_offsets[position+1]-item_offset
Expand All@@ -467,19 +483,17 @@ def __setitem__(self, position, value):
ifisinstance(value,str)elsevalue)
iflen(encoded_value)>allocated_length:
raiseValueError("bytes/str item exceeds available storage")
ifcurrent_format[-1]=="s":
new_format=current_format
else:
new_format=self._types_mapping[str]% (
allocated_length,
)
# Allocated-length format for struct.pack_into (fills the slot).
pack_format="%ds"%allocated_length
# Actual-length format stored in metadata (for exact retrieval).
new_format="%ds"%len(encoded_value)

self._set_packing_format_and_transform(
position,
new_format,
value
)
struct.pack_into(new_format,self.shm.buf,offset,encoded_value)
struct.pack_into(pack_format,self.shm.buf,offset,encoded_value)

def__reduce__(self):
returnpartial(self.__class__,name=self.shm.name), ()
Expand Down
56 changes: 54 additions & 2 deletionsLib/test/_test_multiprocessing.py
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -4757,7 +4757,7 @@ def test_shared_memory_ShareableList_basics(self):
self.assertEqual(current_format,sl._get_packing_format(0))

# Verify attributes are readable.
self.assertEqual(sl.format,'8s8sdqxxxxxx?xxxxxxxx?q')
self.assertEqual(sl.format,'5s5sdqxxxxxx?xxxxxxxx?q')

# Exercise len().
self.assertEqual(len(sl),7)
Expand DownExpand Up@@ -4785,7 +4785,7 @@ def test_shared_memory_ShareableList_basics(self):
self.assertEqual(sl[3],42)
sl[4]='some'# Change type at a given position.
self.assertEqual(sl[4],'some')
self.assertEqual(sl.format,'8s8sdq8sxxxxxxx?q')
self.assertEqual(sl.format,'5s5sdq4sxxxxxxx?q')
withself.assertRaisesRegex(ValueError,
"exceeds available storage"):
sl[4]='far too many'
Expand DownExpand Up@@ -4887,6 +4887,58 @@ def test_shared_memory_ShareableList_pickling_dead_object(self):
withself.assertRaises(FileNotFoundError):
pickle.loads(serialized_sl)

deftest_shared_memory_ShareableList_trailing_nulls(self):
# gh-106939: ShareableList should preserve trailing null bytes
# in bytes and str values.
sl=shared_memory.ShareableList([
b'\x03\x02\x01\x00\x00\x00',
'?\x00',
b'\x00\x00\x00',
b'',
b'no nulls',
])
self.addCleanup(sl.shm.unlink)
self.addCleanup(sl.shm.close)

self.assertEqual(sl[0],b'\x03\x02\x01\x00\x00\x00')
self.assertEqual(sl[1],'?\x00')
self.assertEqual(sl[2],b'\x00\x00\x00')
self.assertEqual(sl[3],b'')
self.assertEqual(sl[4],b'no nulls')

sl2=shared_memory.ShareableList(name=sl.shm.name)
self.addCleanup(sl2.shm.close)
self.assertEqual(sl2[0],b'\x03\x02\x01\x00\x00\x00')
self.assertEqual(sl2[1],'?\x00')
self.assertEqual(sl2[2],b'\x00\x00\x00')
self.assertEqual(sl2[3],b'')
self.assertEqual(sl2[4],b'no nulls')

deftest_shared_memory_ShareableList_multibyte_utf8(self):
# gh-145261: ShareableList should correctly handle multi-byte
# UTF-8 strings without corruption or spillage.
sl=shared_memory.ShareableList([
'ascii',# 1-byte per char (5 bytes)
'café',# 2-byte char: é (5 bytes)
'中文测试',# 3-byte per char (12 bytes)
'𐀀𐀁',# 4-byte per char (8 bytes)
])
self.addCleanup(sl.shm.unlink)
self.addCleanup(sl.shm.close)

self.assertEqual(sl[0],'ascii')
self.assertEqual(sl[1],'café')
self.assertEqual(sl[2],'中文测试')
self.assertEqual(sl[3],'𐀀𐀁')

# Verify cross-process access via name-based attachment.
sl2=shared_memory.ShareableList(name=sl.shm.name)
self.addCleanup(sl2.shm.close)
self.assertEqual(sl2[0],'ascii')
self.assertEqual(sl2[1],'café')
self.assertEqual(sl2[2],'中文测试')
self.assertEqual(sl2[3],'𐀀𐀁')

deftest_shared_memory_cleaned_after_process_termination(self):
cmd='''if 1:
import os, time, sys
Expand Down
View file
Open in desktop
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
Fix:class:`~multiprocessing.shared_memory.ShareableList` corrupting
multi-byte UTF-8 strings due to using character count instead of byte count
for slot allocation, and stripping legitimate trailing null bytes from
:class:`bytes` and:class:`str` values.
Loading

[8]ページ先頭

©2009-2026 Movatter.jp