Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit5aec2d2

Browse files
miss-islingtonsidneyserhiy-storchaka
authored
[3.11]gh-94606: Fix error when message with Unicode surrogate not surrogateescaped string (GH-94641) (GH-112972)
(cherry picked from commit27a5fd8)Co-authored-by: Sidney Markowitz <sidney@sidney.com>Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
1 parenta37e147 commit5aec2d2

File tree

4 files changed

+49
-16
lines changed

4 files changed

+49
-16
lines changed

‎Lib/email/message.py‎

Lines changed: 15 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -289,25 +289,26 @@ def get_payload(self, i=None, decode=False):
289289
# cte might be a Header, so for now stringify it.
290290
cte=str(self.get('content-transfer-encoding','')).lower()
291291
# payload may be bytes here.
292-
ifisinstance(payload,str):
293-
ifutils._has_surrogates(payload):
294-
bpayload=payload.encode('ascii','surrogateescape')
295-
ifnotdecode:
292+
ifnotdecode:
293+
ifisinstance(payload,str)andutils._has_surrogates(payload):
294+
try:
295+
bpayload=payload.encode('ascii','surrogateescape')
296296
try:
297297
payload=bpayload.decode(self.get_param('charset','ascii'),'replace')
298298
exceptLookupError:
299299
payload=bpayload.decode('ascii','replace')
300-
elifdecode:
301-
try:
302-
bpayload=payload.encode('ascii')
303-
exceptUnicodeError:
304-
# This won't happen for RFC compliant messages (messages
305-
# containing only ASCII code points in the unicode input).
306-
# If it does happen, turn the string into bytes in a way
307-
# guaranteed not to fail.
308-
bpayload=payload.encode('raw-unicode-escape')
309-
ifnotdecode:
300+
exceptUnicodeEncodeError:
301+
pass
310302
returnpayload
303+
ifisinstance(payload,str):
304+
try:
305+
bpayload=payload.encode('ascii','surrogateescape')
306+
exceptUnicodeEncodeError:
307+
# This won't happen for RFC compliant messages (messages
308+
# containing only ASCII code points in the unicode input).
309+
# If it does happen, turn the string into bytes in a way
310+
# guaranteed not to fail.
311+
bpayload=payload.encode('raw-unicode-escape')
311312
ifcte=='quoted-printable':
312313
returnquopri.decodestring(bpayload)
313314
elifcte=='base64':

‎Lib/email/utils.py‎

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,10 +49,10 @@
4949
escapesre=re.compile(r'[\\"]')
5050

5151
def_has_surrogates(s):
52-
"""Return True if scontains surrogate-escaped binary data."""
52+
"""Return True if smay contain surrogate-escaped binary data."""
5353
# This check is based on the fact that unless there are surrogates, utf8
5454
# (Python's default encoding) can encode any string. This is the fastest
55-
# way to check for surrogates, seeissue11454 for timings.
55+
# way to check for surrogates, seebpo-11454 (moved to gh-55663) for timings.
5656
try:
5757
s.encode()
5858
returnFalse

‎Lib/test/test_email/test_message.py‎

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -748,6 +748,35 @@ def test_iter_attachments_mutation(self):
748748
self.assertEqual(len(list(m.iter_attachments())),2)
749749
self.assertEqual(m.get_payload(),orig)
750750

751+
get_payload_surrogate_params= {
752+
753+
'good_surrogateescape': (
754+
"String that can be encod\udcc3\udcabd with surrogateescape",
755+
b'String that can be encod\xc3\xabd with surrogateescape'
756+
),
757+
758+
'string_with_utf8': (
759+
"String with utf-8 charactër",
760+
b'String with utf-8 charact\xebr'
761+
),
762+
763+
'surrogate_and_utf8': (
764+
"String that cannot be ëncod\udcc3\udcabd with surrogateescape",
765+
b'String that cannot be\xebncod\\udcc3\\udcabd with surrogateescape'
766+
),
767+
768+
'out_of_range_surrogate': (
769+
"String with\udfff cannot be encoded with surrogateescape",
770+
b'String with\\udfff cannot be encoded with surrogateescape'
771+
),
772+
}
773+
774+
defget_payload_surrogate_as_gh_94606(self,msg,expected):
775+
"""test for GH issue 94606"""
776+
m=self._str_msg(msg)
777+
payload=m.get_payload(decode=True)
778+
self.assertEqual(expected,payload)
779+
751780

752781
classTestEmailMessage(TestEmailMessageBase,TestEmailBase):
753782
message=EmailMessage
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Fix UnicodeEncodeError when:func:`email.message.get_payload` reads a message
2+
with a Unicode surrogate character and the message content is not well-formed for
3+
surrogateescape encoding. Patch by Sidney Markowitz.

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp